<?xml version='1.0' encoding='UTF-8'  ?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://purl.org/rss/1.0/" xmlns:dc="http://purl.org/dc/elements/1.1/">	<channel rdf:about="http://www.semanlink.net/tag/ml_google">		<title>AI@Google</title>		<link>http://www.semanlink.net/tag/ml_google</link>		<description>Documents tagged with AI@Google</description>		<items>			<rdf:Seq>							<rdf:li resource="http://www.semanlink.net/doc/2025/11/introduction_to_agents_%7C_kaggle"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/08/github_google_langextract_a_"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/07/ettin_suite_sota_paired_encode"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/06/agent_development_kit"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/06/google_adk_and_anthropic_mcp_a"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/05/lightonai_reason_moderncolbert_"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/05/2503_05500_eurobert_scaling_"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/05/lighton_releases_gte_moderncolb"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/02/benjamin_clavie_sur_x_what_i"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/01/matthewberman_sur_x_titans_"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/01/jack_morris_sur_x_%F0%9F%93%A2_new_sta"/>				<rdf:li resource="http://www.semanlink.net/doc/2025/01/nomic_ai_modernbert_embed_base_"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/12/2412_13663_smarter_better_f"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/12/jeremy_howard_sur_x_i_ll_get"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/10/how_to_build_a_custom_text_clas"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/10/alphafold_3_le_logiciel_phare_"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/10/le_prix_nobel_de_chimie_2024_es"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/08/rohan_paul_sur_x_rag_or_long"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/05/2405_05904_does_fine_tuning_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/04/llamaindex_%F0%9F%A6%99_sur_x_fine_tun"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/04/2404_11018_many_shot_in_conte"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/02/2307_15936_a_theory_for_emerg"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/02/jeff_dean_%F0%9F%8F%A1_sur_x_gemini"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/01/an_efficient_long_text_semantic"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/01/rachit_bansal_sur_x_extendin"/>				<rdf:li resource="http://www.semanlink.net/doc/2024/01/maarten_grootendorst_sur_x_b"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/12/ukp_lab_sur_x_need_a_lightwe"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/10/rethinking_query_expansion_for_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/09/getting_started_with_deepmatche"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/08/2002_06275_twinbert_distilli"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/08/modular_and_parameter_efficient"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/07/sparseembed_learning_sparse_le"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/07/2305_14128_dr_icl_demonstrat"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/06/generative_ai_support_on_vertex"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/06/daniel_daza_sur_twitter_new_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/2305_11778_cross_lingual_supe"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/peter_j_liu_sur_twitter_her"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/2305_06897_afriqa_cross_ling"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/google_ai_palm_2_google_ai"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/google_teases_project_tailwind_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/skeskinen_bert_cpp_ggml_implem"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/document_ai_%7C_google_for_deve"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/niels_rogge_sur_twitter_made"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/05/google_we_have_no_moat_and_ne"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/aran_komatsuzaki_sur_twitter__3"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/2303_16839_mammut_a_simple_a"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/domain_adaptation_with_generati"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/classifying_long_textual_docume"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/diffusion_language_models_san"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/daniel_vila_suero_sur_twitter_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/04/2304_01982_rethinking_the_rol"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/03/niels_rogge_sur_twitter_exci"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/03/enabling_python_virtualenv_in_j"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/2112_05682_self_attention_doe"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/2108_08877_sentence_t5_scala"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/maarten_grootendorst_sur_twitte"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/jim_fan_sur_twitter_do_you_k"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/guiding_frozen_language_models_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/2203_14465_star_bootstrappin"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/google_announces_chatgpt_rival_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/ramsri_goutham_golla_sur_twitte"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/the_flan_collection_advancing_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/02/shayne_longpre_sur_twitter_w"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/creer_un_notebook_jupyterlab_ve"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/5_steps_to_go_from_a_notebook_t"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/lamda_our_breakthrough_convers"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/an_empirical_analysis_of_comput"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/characterizing_emergent_phenome"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/2301_08210_everything_is_conn"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/multilingual_sentence_transform"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/alphafold%E2%80%99s_new_rival_meta_ai_"/>				<rdf:li resource="http://www.semanlink.net/doc/2023/01/andrej_karpathy_sur_twitter_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/12/rohan_anil_sur_twitter_next_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/12/valueerror_invalid_literal_for"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/11/few_shot_text_classification_c"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/11/one_of_the_biggest_problems_in_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/2202_06991_transformer_memory"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/tutorial_on_uncertainty_estimat"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/stephanie_chan_sur_twitter_t"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/lewis_tunstall_sur_twitter_t"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/huggingface_setfit_efficient_f"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/santiago_sur_twitter_if_you_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/10/yi_tay_sur_twitter_don_t_ret"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/2205_11498_domain_adaptation_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/2209_11055_efficient_few_shot"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/google_ai_blog_tensorstore_for"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/promptbert_improving_bert_sente"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/2201_04337_promptbert_improv"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/prompt_tuning_bert%F0%9F%8E%AF_commonlit_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/active_learning_for_bert_an_em"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/09/2106_10199_bitfit_simple_par"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/08/on_stability_of_few_sample_tran"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/08/unsupervised_learning_sentenc"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/08/train_and_fine_tune_sentence_tr"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/07/2205_00820_entity_aware_trans"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/07/leshem_choshen_sur_twitter_c"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/07/2206_10658_questions_are_all_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/unveiling_transformers_with_leg"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/using_bert_for_classifying_docu"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/chris_olah_sur_twitter_i_m_e"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/google_ai_blog_limoe_learning"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/sentence_bert_model_in_onnx_for"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/2205_15952_knowledge_graph_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/domain_transfer_with_ggpl_germ"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/06/nils_reimers_sur_twitter_gpl"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/2205_08184_skill_structured_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/2205_05131_unifying_language_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/bertopic_the_future_of_topic_m"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/2205_04260_ease_entity_aware"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/05/2203_08913_memorizing_transfo"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/ramsri_goutham_golla_sur_twitte"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/1909_00426_global_entity_disa"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/2110_08151_mluke_the_power_o"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/ikuya_yamada_sur_twitter_is_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/tu_vu_sur_twitter_enormous_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/google_ai_blog_pathways_langua"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/04/2008_11228_a_simple_method_fo"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/2004_05119_beyond_fine_tuning"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/sentence_transformer_fine_tunin"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/nils_reimers_sur_twitter_gre"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/sentence_embedding_fine_tuning_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/domain_adaptation_sentence_tr"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/2203_14655_few_shot_learning_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/2006_05987_revisiting_few_sam"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/nlp_%7C_how_to_add_a_domain_speci"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/studio_ousia_sur_twitter_now"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/andrew_trask_about_large_langua"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/maartengr_bertopic_leveraging_"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/document_matching_for_job_descr"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/03/naver_labs_europe_nils_reim"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_cre"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_how"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/02/sentence_transformers_fast_clus"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/01/gsarti_scibert_nli_%C2%B7_hugging_fa"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/01/semantic_search_sentence_tran"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/01/1906_00300_latent_retrieval_f"/>				<rdf:li resource="http://www.semanlink.net/doc/2022/01/domain_transfer_with_bert_%7C_pin"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/anthropic_sur_twitter_a_math"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/making_the_most_of_data_augmen"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/using_pretrained_sbert_model_in"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/advance_bert_model_via_transfer"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/2112_07577_gpl_generative_ps"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/improving_language_models_by_re"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/semantic_search_through_a_vecto"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/12/unsupervised_extractive_summari"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/11/unsupervised_training_for_sente"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/11/how_to_fine_tune_sentence_bert_"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/11/multilingual_sentence_transform"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/11/mixed_negative_sampling_for_lea"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/11/train_embeddings_by_using_the_t"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/on_the_stability_of_fine_tuning"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/next_gen_sentence_embeddings_wi"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/sentence_embeddings_and_transfo"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/alphafold_2_is_here_what%E2%80%99s_beh"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/l%E2%80%99intelligence_artificielle_ge"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/sahajtomar_french_semantic_%C2%B7_hu"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/omer_levy_sur_twitter_what_i"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/10/google_ai_blog_exploring_trans"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/09/2106_04647_compacter_efficie"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/09/2010_12566_dict_mlm_improved"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/07/google_ai_blog_from_vision_to_"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/06/2102_07043_reasoning_over_vir"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/06/semantic_search_with_s_bert_is_"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/05/making_sense_of_raw_input"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/05/1906_03158_matching_the_blank"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/05/1909_10506_learning_dense_rep"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_sbe"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/2011_05864_on_the_sentence_em"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/simcse_simple_contrastive_lear"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_new"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/2007_12603_ir_bert_leveragin"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/2007_15779_domain_specific_la"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/1902_00751_parameter_efficien"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/exbert_extending_pre_trained_m"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/04/1910_02227_making_sense_of_se"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/03/1901_04085_passage_re_ranking"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/03/sentencetransformers_documentat"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/03/rodrigo_nogueira_sur_twitter_"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/02/zero_shot_learning_in_modern_nl"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/02/kamalkraj_bert_ner_pytorch_nam"/>				<rdf:li resource="http://www.semanlink.net/doc/2021/01/1911_03681_e_bert_efficient_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/google_tapas_base_finetuned_wtq"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/2009_02835_e_bert_a_phrase_a"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/2002_08909_realm_retrieval_a"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/supporting_content_decision_mak"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/google_ai_blog_reformer_the_e"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/salmon_run_word_sense_disambig"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/12/domain_specific_bert_models_%C2%B7_c"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/11/2010_01057_luke_deep_context"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/11/raphaelsty_ckb_contextual_know"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/11/2010_03496_inductive_entity_r"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/10/which_flavor_of_bert_should_you"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/10/2010_00402_from_trees_to_cont"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/08/google_ai_blog_realm_integrat"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/ukplab_sentence_transformers_s"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/how_to_use_bert_for_finding_sim"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/2004_07202_entities_as_expert"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/2007_00849_facts_as_experts_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/bert_word_embeddings_tutorial_%C2%B7"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/07/learning_to_tag_oov_tokens_by_i"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/patrick_von_platen_sur_twitter_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/2001_04451_reformer_the_effi"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/representation_learning_for_inf"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/google_ai_blog_extracting_stru"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/1804_03235_large_scale_distri"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/06/on_word_embeddings"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/05/1909_04164_knowledge_enhanced"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/05/phd_thesis_deep_learning_with_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/05/1911_03814_zero_shot_entity_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/03/bert_elmo_gpt_2_how_contex"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/03/_1909_03193_kg_bert_bert_for_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/03/_1909_07606_k_bert_enabling_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/03/unsupervised_ner_using_bert_h"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/_2002_12327_a_primer_in_bertol"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/_2002_11402_detecting_potentia"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/distilling_bert_models_with_spa"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/hugging_face_sur_twitter_to_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/how_much_knowledge_can_you_pack"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/adam_roberts_sur_twitter_new"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/_1911_05507_compressive_transf"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/a_new_model_and_dataset_for_lon"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/_2002_02925_bert_of_theseus_c"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/02/canwen_xu_sur_twitter_wtf_w"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/_1503_03832_facenet_a_unified"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/paris_nlp_season_4_meetup_3_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/semantic_text_matching_for_long"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/building_a_search_engine_with_b"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/elasticsearch_meets_bert_build"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/nlp_s_clever_hans_moment_has_ar"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/investigating_entity_knowledge_"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/named_entity_recognition_with_b"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/_1902_10909_bert_for_joint_int"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/richer_sentence_embeddings_usin"/>				<rdf:li resource="http://www.semanlink.net/doc/2020/01/lecture_14_contextual_vectors"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/11/artificial_human_intelligence_"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/11/_1807_00082_amanuensis_the_pr"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/11/camembert"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/11/_1911_01464_emerging_cross_lin"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/10/bert_is_now_part_of_google_sear"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/10/restoring_ancient_text_using_de"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/10/language_and_perception_in_deep"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/10/meet_albert_a_new_%E2%80%98lite_bert%E2%80%99_"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/09/evolution_of_representations_in"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/09/introducing_neural_structured_l"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/smaller_faster_cheaper_light"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/_1908_10084_sentence_bert_sen"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/_1808_02590_a_tutorial_on_netw"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/watch_your_step_learning_node_"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/_1905_07129_ernie_enhanced_la"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/08/what_is_xlnet_and_why_it_outper"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/07/a2n_attending_to_neighbors_for"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/07/bert_s_success_in_some_benchmar"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/07/_1907_07355_probing_neural_net"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/google_ai_blog_harnessing_orga"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/_1906_04341_what_does_bert_loo"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/nlp_contextualized_word_embedd"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/hamiltonian_neural_networks"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/_1906_02715_visualizing_and_me"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/06/_i_made_a_bet_that_a_naive_baye"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/05/introducing_fastbert_a_simple"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/05/robust_language_representation_"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/05/_1905_05950_bert_rediscovers_t"/>				<rdf:li resource="https://arxiv.org/abs/1904.08398"/>				<rdf:li resource="https://arxiv.org/abs/1803.02893"/>				<rdf:li resource="http://www.offconvex.org/2019/03/19/CURL/"/>				<rdf:li resource="https://github.com/huggingface/pytorch-pretrained-BERT"/>				<rdf:li resource="https://twitter.com/fchollet/status/1105139360226140160"/>				<rdf:li resource="https://arxiv.org/abs/1901.11504"/>				<rdf:li resource="https://blog.insightdatascience.com/using-bert-for-state-of-the-art-pre-training-for-natural-language-processing-1d87142c29e7"/>				<rdf:li resource="https://nlp.stanford.edu/seminar/details/jdevlin.pdf"/>				<rdf:li resource="https://www.zdnet.com/article/google-explores-ais-mysterious-polytope/"/>				<rdf:li resource="http://www.semanlink.net/doc/2019/02/keywords2vec"/>				<rdf:li resource="https://www.lemonde.fr/pixels/article/2019/02/06/intelligence-artificielle-deepmind-s-interesse-au-jeu-de-cartes-francais-hanabi_5420186_4408996.html"/>				<rdf:li resource="https://nlpparis.files.wordpress.com/2019/01/hyperlex_meetup23011.pdf"/>				<rdf:li resource="https://medium.com/data-from-the-trenches/training-cutting-edge-neural-networks-with-tensor2tensor-and-10-lines-of-code-10973c030b8"/>				<rdf:li resource="https://twitter.com/dpkingma/status/1070856305831624704"/>				<rdf:li resource="https://openreview.net/forum?id=S1HlA-ZAZ"/>				<rdf:li resource="https://jalammar.github.io/illustrated-bert/"/>				<rdf:li resource="https://ai.googleblog.com/2018/10/google-at-emnlp-2018.html"/>				<rdf:li resource="https://ai.googleblog.com/2018/11/open-sourcing-bert-state-of-art-pre.html"/>				<rdf:li resource="https://github.com/google-research/bert"/>				<rdf:li resource="https://aclanthology.coli.uni-saarland.de/papers/D18-1092/d18-1092"/>				<rdf:li resource="https://sermanet.github.io/imitate/"/>				<rdf:li resource="https://twitter.com/TensorFlow/status/1055538593941409792"/>				<rdf:li resource="https://arxiv.org/abs/1703.03129"/>				<rdf:li resource="https://arxiv.org/abs/1810.04805"/>				<rdf:li resource="https://js.tensorflow.org/"/>				<rdf:li resource="https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb"/>				<rdf:li resource="https://www.quora.com/For-what-tasks-is-Pytorch-preferable-to-Tensorflow"/>				<rdf:li resource="https://medium.com/@faizanmukardam/simple-guide-to-neural-arithmetic-logic-units-nalu-explanation-intuition-and-code-64bc22605712"/>				<rdf:li resource="https://arxiv.org/abs/1807.03748"/>				<rdf:li resource="https://www.tensorflow.org/extras/candidate_sampling.pdf"/>				<rdf:li resource="https://stats.stackexchange.com/questions/244616/how-sampling-works-in-word2vec-can-someone-please-make-me-understand-nce-and-ne/245452#245452"/>				<rdf:li resource="https://arxiv.org/abs/1806.01261"/>				<rdf:li resource="https://cloud.google.com/tpu/"/>				<rdf:li resource="https://arxiv.org/abs/1803.11175"/>				<rdf:li resource="https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder-large/1"/>				<rdf:li resource="https://guillaumegenthial.github.io/testing.html"/>				<rdf:li resource="https://towardsdatascience.com/how-to-use-dataset-in-tensorflow-c758ef9e4428"/>				<rdf:li resource="http://ruder.io/text-classification-tensorflow-estimators/"/>				<rdf:li resource="https://deepmind.com/blog/learning-to-generate-images/"/>				<rdf:li resource="https://arxiv.org/abs/1803.05651"/>				<rdf:li resource="https://github.com/anvaka/word2vec-graph"/>				<rdf:li resource="http://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html"/>				<rdf:li resource="https://github.com/tensorflow/models"/>				<rdf:li resource="https://medium.com/scaleabout/a-gentle-introduction-to-doc2vec-db3e8c0cce5e"/>				<rdf:li resource="https://www.quora.com/How-does-doc2vec-represent-feature-vector-of-a-document-Can-anyone-explain-mathematically-how-the-process-is-done/answer/Piyush-Bhardwaj-7"/>				<rdf:li resource="https://arxiv.org/abs/1710.04099"/>				<rdf:li resource="https://arxiv.org/abs/1712.09405"/>				<rdf:li resource="https://github.com/RaRe-Technologies/gensim/blob/c971411c09773488dbdd899754537c0d1a9fce50/docs/notebooks/WMD_tutorial.ipynb"/>				<rdf:li resource="http://learningsys.org/nips17/assets/slides/dean-nips17.pdf"/>				<rdf:li resource="https://arxiv.org/abs/1712.01208v1"/>				<rdf:li resource="http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/"/>				<rdf:li resource="https://www.kaggle.com/cpmpml/spell-checker-using-word2vec?scriptVersionId=1152488"/>				<rdf:li resource="https://www.kaggle.com/lystdo/lstm-with-word2vec-embeddings"/>				<rdf:li resource="http://ben.bolte.cc/blog/2016/gensim.html"/>				<rdf:li resource="http://adventuresinmachinelearning.com/recurrent-neural-networks-lstm-tutorial-tensorflow/"/>				<rdf:li resource="http://adventuresinmachinelearning.com/word2vec-keras-tutorial/"/>				<rdf:li resource="https://www.tensorflow.org/install/install_mac"/>				<rdf:li resource="https://deepmind.com/blog/alphago-zero-learning-scratch/"/>				<rdf:li resource="http://www.lemonde.fr/pixels/article/2017/10/18/intelligence-artificielle-toujours-plus-puissant-alphago-apprend-desormais-sans-donnees-humaines_5202931_4408996.html"/>				<rdf:li resource="http://nicodjimenez.github.io/2017/10/08/tensorflow.html"/>				<rdf:li resource="https://web.stanford.edu/class/cs276/handouts/lecture20-distributed-representations.pdf"/>				<rdf:li resource="https://github.com/tensorflow/nmt"/>				<rdf:li resource="http://mccormickml.com/2016/04/27/word2vec-resources/"/>				<rdf:li resource="http://mccormickml.com/2017/01/11/word2vec-tutorial-part-2-negative-sampling/"/>				<rdf:li resource="http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/"/>				<rdf:li resource="https://www.quora.com/How-does-word2vec-work-Can-someone-walk-through-a-specific-example"/>				<rdf:li resource="https://www.tensorflow.org/tutorials/word2vec"/>				<rdf:li resource="https://arxiv.org/pdf/1507.07998.pdf"/>				<rdf:li resource="http://blog.aylien.com/overview-word-embeddings-history-word2vec-cbow-glove/"/>				<rdf:li resource="https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-3-more-fun-with-word-vectors"/>				<rdf:li resource="https://www.quora.com/Can-I-use-word2vec-representation-to-train-a-weka-classifier"/>				<rdf:li resource="https://www.quora.com/Can-I-use-word2vec-to-train-a-machine-learning-classifier"/>				<rdf:li resource="http://fauconnier.github.io/"/>				<rdf:li resource="https://arxiv.org/abs/1405.4053"/>				<rdf:li resource="https://www.analyticsvidhya.com/blog/2017/06/word-embeddings-count-word2veec/"/>				<rdf:li resource="https://github.com/3Top/word2vec-api"/>				<rdf:li resource="https://radimrehurek.com/gensim/models/word2vec.html"/>				<rdf:li resource="https://rare-technologies.com/word2vec-tutorial/"/>				<rdf:li resource="https://www.quora.com/Are-there-any-more-modern-alternatives-to-word2vec"/>				<rdf:li resource="https://transacl.org/ojs/index.php/tacl/article/view/582/158"/>				<rdf:li resource="http://stackoverflow.com/questions/32979254/using-word2vec-for-topic-modeling"/>				<rdf:li resource="http://nadbordrozd.github.io/blog/2016/05/20/text-classification-with-word2vec/"/>				<rdf:li resource="http://clic.cimec.unitn.it/marco/publications/acl2014/baroni-etal-countpredict-acl2014.pdf"/>				<rdf:li resource="https://www.quora.com/How-is-GloVe-different-from-word2vec"/>				<rdf:li resource="https://www.newscientist.com/article/2110522-googles-neural-networks-invent-their-own-encryption/"/>				<rdf:li resource="http://fgiasson.com/blog/index.php/2016/09/28/using-cognonto-to-generate-domain-specific-word2vec-models/"/>				<rdf:li resource="https://cloud.google.com/blog/big-data/2016/07/understanding-neural-networks-with-tensorflow-playground"/>				<rdf:li resource="http://www.theguardian.com/world/2016/mar/13/go-humans-lee-sedol-scores-first-victory-against-supercomputer"/>				<rdf:li resource="http://www.wired.com/2016/03/sadness-beauty-watching-googles-ai-play-go"/>				<rdf:li resource="http://www.lab41.org/anything2vec/"/>				<rdf:li resource="http://deeplearning4j.org/word2vec.html"/>				<rdf:li resource="http://opiateforthemass.es/articles/mini-ai-app-using-tensorflow-and-shiny/"/>				<rdf:li resource="http://arxiv.org/pdf/1301.3781.pdf"/>				<rdf:li resource="http://www.nature.com/news/game-playing-software-holds-lessons-for-neuroscience-1.16979"/>				<rdf:li resource="http://robohub.org/how-friendly-is-your-ai-it-depends-on-the-rewards/"/>				<rdf:li resource="http://www.kdnuggets.com/2015/12/tensor-flow-terrific-deep-learning-library.html"/>				<rdf:li resource="https://bcomposes.wordpress.com/2015/11/26/simple-end-to-end-tensorflow:-examples/?utm_content=buffer46554&amp;utm_medium=social&amp;utm_source=twitter.com&amp;utm_campaign=buffer"/>				<rdf:li resource="http://googleresearch.blogspot.fr/2015/11/tensorflow-googles-latest-machine_9.html?m=1"/>				<rdf:li resource="http://download.tensorflow.org/paper/whitepaper2015.pdf"/>				<rdf:li resource="http://robohub.org/artificial-general-intelligence-that-plays-atari-video-games-how-did-deepmind-do-it/"/>				<rdf:li resource="http://recode.net/2014/05/27/googles-new-self-driving-car-ditches-the-steering-wheel/"/>				<rdf:li resource="http://vancouverdata.blogspot.fr/2012/08/googles-self-driving-cars-are-going-to.html"/>				<rdf:li resource="http://www.lemonde.fr/technologies/article/2013/08/26/google-investit-dans-le-service-de-taxis-uber_3466504_651865.html#"/>				<rdf:li resource="http://www.newyorker.com/online/blogs/newsdesk/2012/11/google-driverless-car-morality.html"/>				<rdf:li resource="http://googleresearch.blogspot.co.uk/2012/05/from-words-to-concepts-and-back.html"/>				<rdf:li resource="http://blog.outer-court.com/archive/2005-05-22-n83.html"/>			</rdf:Seq>		</items>	</channel>		<item rdf:about="http://www.semanlink.net/doc/2025/11/introduction_to_agents_%7C_kaggle">		<title>Introduction to Agents | Kaggle</title>		<link>http://www.semanlink.net/doc/2025/11/introduction_to_agents_%7C_kaggle</link>		<dc:date>2025-11-11T13:47:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/08/github_google_langextract_a_">		<title>GitHub - google/langextract: A Python library for extracting structured information from unstructured text using LLMs with precise source grounding and interactive visualization.</title>		<link>http://www.semanlink.net/doc/2025/08/github_google_langextract_a_</link>		<description>Python library that uses LLMs to extract structured information from unstructured text documents based on user-defined instructions.		</description>		<dc:date>2025-08-13T08:08:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/07/ettin_suite_sota_paired_encode">		<title>Ettin Suite: SoTA Paired Encoders and Decoders</title>		<link>http://www.semanlink.net/doc/2025/07/ettin_suite_sota_paired_encode</link>		<description>&gt; What would happen if you took the ModernBERT recipe and applied it to a decoder-only model? Turns out, a state-of-the-art decoder language model that beats Llama 3.2 1B and SmolLM2!		</description>		<dc:date>2025-07-18T00:13:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/06/agent_development_kit">		<title>Agent Development Kit</title>		<link>http://www.semanlink.net/doc/2025/06/agent_development_kit</link>		<dc:date>2025-06-28T13:39:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/06/google_adk_and_anthropic_mcp_a">		<title>Google ADK and Anthropic MCP: A comparative review</title>		<link>http://www.semanlink.net/doc/2025/06/google_adk_and_anthropic_mcp_a</link>		<description>&gt; ADK and MCP address different, yet complementary aspects of agent development. While ADK focuses on agent orchestration, reasoning, and internal structure, MCP simplifies external data access and standardizes how agents communicate with external services.		</description>		<dc:date>2025-06-28T13:34:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/05/lightonai_reason_moderncolbert_">		<title>lightonai/Reason-ModernColBERT · Hugging Face</title>		<link>http://www.semanlink.net/doc/2025/05/lightonai_reason_moderncolbert_</link>		<dc:date>2025-05-29T13:43:00Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/05/2503_05500_eurobert_scaling_">		<title>[2503.05500&#93; EuroBERT: Scaling Multilingual Encoders for European Languages</title>		<link>http://www.semanlink.net/doc/2025/05/2503_05500_eurobert_scaling_</link>		<dc:date>2025-05-13T19:31:53Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/05/lighton_releases_gte_moderncolb">		<title>LightOn Releases GTE-ModernColBERT, First State-of-the-Art Late-Interaction Model Trained on PyLate! - LightOn</title>		<link>http://www.semanlink.net/doc/2025/05/lighton_releases_gte_moderncolb</link>		<dc:date>2025-05-01T22:40:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/02/benjamin_clavie_sur_x_what_i">		<title>Benjamin Clavié sur X : &quot;What if a [MASK&#93; was all you needed?...&quot;</title>		<link>http://www.semanlink.net/doc/2025/02/benjamin_clavie_sur_x_what_i</link>		<dc:date>2025-02-11T00:25:23Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/01/matthewberman_sur_x_titans_">		<title>MatthewBerman sur X : &quot;Titans: Learning to Memorize at Test Time&quot;...</title>		<link>http://www.semanlink.net/doc/2025/01/matthewberman_sur_x_titans_</link>		<description>&gt; human-like memory structures to overcome the limits of Transformers, with one &quot;SURPRISING&quot; feature.
&gt;
&gt; - Short-term memory (real-time processing)
&gt; - Long-term memory (retaining key past information)
&gt; - Persistent memory (task-specific baked-in knowledge)

&gt; Titans can learn and adapt during inference (test time), unlike Transformers, which rely on pre-training.		</description>		<dc:date>2025-01-17T13:26:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/01/jack_morris_sur_x_%F0%9F%93%A2_new_sta">		<title>jack morris sur X : &quot;New state-of-the-art small text embedding model... (cde-small-v2)&quot;</title>		<link>http://www.semanlink.net/doc/2025/01/jack_morris_sur_x_%F0%9F%93%A2_new_sta</link>		<description>(cf. [cde-small-v1&#93;(doc:2024/10/philipp_schmid_sur_x_can_we_) : creating &quot;context-aware&quot; embeddings using neighboring document information)		</description>		<dc:date>2025-01-15T01:41:26Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2025/01/nomic_ai_modernbert_embed_base_">		<title>nomic-ai/modernbert-embed-base · Hugging Face</title>		<link>http://www.semanlink.net/doc/2025/01/nomic_ai_modernbert_embed_base_</link>		<description>&gt; embedding model trained from ModernBERT-base, bringing the new advances of ModernBERT to embeddings!		</description>		<dc:date>2025-01-02T15:59:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/12/2412_13663_smarter_better_f">		<title>[2412.13663&#93; Smarter, Better, Faster, Longer: A Modern Bidirectional Encoder for Fast, Memory Efficient, and Long Context Finetuning and Inference</title>		<link>http://www.semanlink.net/doc/2024/12/2412_13663_smarter_better_f</link>		<dc:date>2024-12-21T22:45:32Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/12/jeremy_howard_sur_x_i_ll_get">		<title>Jeremy Howard sur X : &quot;We trained 2 new models. Like BERT, but modern. ModernBERT. Not some hypey GenAI thing, but a proper workhorse model, for retrieval, classification, etc...&quot;</title>		<link>http://www.semanlink.net/doc/2024/12/jeremy_howard_sur_x_i_ll_get</link>		<description>&lt;https://x.com/LightOnIO/status/1869785737832366306&gt;		</description>		<dc:date>2024-12-21T17:13:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/10/how_to_build_a_custom_text_clas">		<title>How to build a custom text classifier without days of human labeling</title>		<link>http://www.semanlink.net/doc/2024/10/how_to_build_a_custom_text_clas</link>		<description>how to train a classifier with an LLM’s feedback to deploy a small model without days of human labeling.		</description>		<dc:date>2024-10-19T07:37:22Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/10/alphafold_3_le_logiciel_phare_">		<title>AlphaFold 3, le logiciel phare de DeepMind pour modéliser les protéines, frustre les chercheurs</title>		<link>http://www.semanlink.net/doc/2024/10/alphafold_3_le_logiciel_phare_</link>		<dc:date>2024-10-09T14:26:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/10/le_prix_nobel_de_chimie_2024_es">		<title>Le prix Nobel de chimie 2024 est attribué à David Baker, Demis Hassabis et John Jumper pour leurs travaux sur les protéines</title>		<link>http://www.semanlink.net/doc/2024/10/le_prix_nobel_de_chimie_2024_es</link>		<dc:date>2024-10-09T14:25:21Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/08/rohan_paul_sur_x_rag_or_long">		<title>Rohan Paul sur X : &quot;RAG or Long Context ?? ...&quot;</title>		<link>http://www.semanlink.net/doc/2024/08/rohan_paul_sur_x_rag_or_long</link>		<dc:date>2024-08-24T09:56:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/05/2405_05904_does_fine_tuning_l">		<title>[2405.05904&#93; Does Fine-Tuning LLMs on New Knowledge Encourage Hallucinations?</title>		<link>http://www.semanlink.net/doc/2024/05/2405_05904_does_fine_tuning_l</link>		<description>&gt; our results highlight the risk in introducing new factual knowledge through fine-tuning, and support the view that large language models mostly acquire factual knowledge through pre-training, whereas fine-tuning teaches them to use it more efficiently		</description>		<dc:date>2024-05-22T18:02:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/04/llamaindex_%F0%9F%A6%99_sur_x_fine_tun">		<title>LlamaIndex 🦙 sur X : &quot;Fine-tuning Embedding Models for RAG with LoRA&apos;</title>		<link>http://www.semanlink.net/doc/2024/04/llamaindex_%F0%9F%A6%99_sur_x_fine_tun</link>		<dc:date>2024-04-23T23:20:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/04/2404_11018_many_shot_in_conte">		<title>[2404.11018&#93; Many-Shot In-Context Learning</title>		<link>http://www.semanlink.net/doc/2024/04/2404_11018_many_shot_in_conte</link>		<dc:date>2024-04-21T13:25:46Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/02/2307_15936_a_theory_for_emerg">		<title>[2307.15936&#93; A Theory for Emergence of Complex Skills in Language Models</title>		<link>http://www.semanlink.net/doc/2024/02/2307_15936_a_theory_for_emerg</link>		<description>[New Theory Suggests Chatbots Can Understand Text | Quanta Magazine&#93;(doc:2024/02/new_theory_suggests_chatbots_ca)		</description>		<dc:date>2024-02-24T00:11:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/02/jeff_dean_%F0%9F%8F%A1_sur_x_gemini">		<title>Jeff Dean (@🏡) sur X : &quot;Gemini 1.5 Pro - A highly capable multimodal model with a 10M token context length...&quot;</title>		<link>http://www.semanlink.net/doc/2024/02/jeff_dean_%F0%9F%8F%A1_sur_x_gemini</link>		<dc:date>2024-02-15T22:26:23Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/01/an_efficient_long_text_semantic">		<title>An efficient long-text semantic retrieval approach via utilizing presentation learning on short-text | Complex &amp; Intelligent Systems (2023)</title>		<link>http://www.semanlink.net/doc/2024/01/an_efficient_long_text_semantic</link>		<description>long-text retrieval model based on BERT (called LTR-BERT)		</description>		<dc:date>2024-01-31T17:59:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/01/rachit_bansal_sur_x_extendin">		<title>Rachit Bansal sur X : &quot;An LLM can be efficiently *composed* with specialized (L)LMs to enable new tasks&quot;</title>		<link>http://www.semanlink.net/doc/2024/01/rachit_bansal_sur_x_extendin</link>		<description>[[2401.02412&#93; LLM Augmented LLMs: Expanding Capabilities through Composition&#93;(doc:2024/01/2401_02412_llm_augmented_llms)

&gt; CALM—Composition to Augment Language Models:
&gt; 1. Scales up LLMs on new tasks by *re-using* existing (L)LMs w/ very few new parameters &amp; data,
&gt; 2. Keeps existing model weights intact, hence **preserves original capabilities**,
&gt; 3. Applies to diverse domains and settings.

&gt; Rather than a shallow combination, CALM introduces a small set of cross-attention parameters over models’ layer representations.

Use-case example, Multilinguality:

&gt; We reuse an LM trained on a bunch of low-resource languages (LRLs)
w/ an LLM that has never seen some of these LRLs.
		</description>		<dc:date>2024-01-06T12:07:15Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2024/01/maarten_grootendorst_sur_x_b">		<title>Maarten Grootendorst sur X : &quot;BERTopic + LLMs + DataMapPlot&quot;</title>		<link>http://www.semanlink.net/doc/2024/01/maarten_grootendorst_sur_x_b</link>		<dc:date>2024-01-06T09:57:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/12/ukp_lab_sur_x_need_a_lightwe">		<title>UKP Lab sur X : &quot;a lightweight solution for few-shot domain-specific sentence classification: AdaSent!...&quot;</title>		<link>http://www.semanlink.net/doc/2023/12/ukp_lab_sur_x_need_a_lightwe</link>		<description>AdaSent is an approach to creating domain-specialized sentence encoders for few-shot sentence classification

&gt; Reusable general sentence adapter across domains

&gt; AdaSent decouples DAPT (Domain Adaptative Pre-Training)  &amp; SEPT (Sentence Embedding Pre-Training) **by storing the sentence encoding abilities into an adapter**, which is trained only once in the general domain and plugged into various DAPT-ed PLMs

[Github&#93;(https://github.com/UKPLab/AdaSent)		</description>		<dc:date>2023-12-09T19:40:21Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/10/rethinking_query_expansion_for_">		<title>Rethinking Query Expansion for BERT Reranking | Advances in Information Retrieval (2020)</title>		<link>http://www.semanlink.net/doc/2023/10/rethinking_query_expansion_for_</link>		<description>using BERT for Information Retrieval:
&gt; We find that traditional word-based query expansion is not entirely applicable		</description>		<dc:date>2023-10-29T09:05:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i">		<title>Maarten Grootendorst sur X : &quot;Introducing KeyLLM. An extension to KeyBERT that can create, extract, and fine-tune keywords using Large Language Models!</title>		<link>http://www.semanlink.net/doc/2023/09/maarten_grootendorst_sur_x_i</link>		<dc:date>2023-09-30T14:26:24Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/09/getting_started_with_deepmatche">		<title>Getting started with DeepMatcher.ipynb - Colaboratory</title>		<link>http://www.semanlink.net/doc/2023/09/getting_started_with_deepmatche</link>		<dc:date>2023-09-20T08:37:26Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/08/2002_06275_twinbert_distilli">		<title>[2002.06275&#93; TwinBERT: Distilling Knowledge to Twin-Structured BERT Models for Efficient Retrieval</title>		<link>http://www.semanlink.net/doc/2023/08/2002_06275_twinbert_distilli</link>		<dc:date>2023-08-27T11:40:00Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/08/modular_and_parameter_efficient">		<title>Modular and Parameter-Efficient Fine-Tuning for NLP Models</title>		<link>http://www.semanlink.net/doc/2023/08/modular_and_parameter_efficient</link>		<dc:date>2023-08-08T09:16:37Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/07/sparseembed_learning_sparse_le">		<title>SparseEmbed: Learning Sparse Lexical Representations with Contextual Embeddings for Retrieval</title>		<link>http://www.semanlink.net/doc/2023/07/sparseembed_learning_sparse_le</link>		<description>retrieval model that learns sparse lexical representations with contextual embeddings

&gt; we **combine the strengths of both the sparse and dense representations** for first-stage retrieval.
&gt;
&gt; Compared
with [SPLADE&#93;(tag:splade), our model leverages the contextual embeddings
to improve model expressiveness. Compared with [ColBERT&#93;(tag:colbert),
our sparse representations are trained end-to-end to optimize both
efficiency and effectiveness.		</description>		<dc:date>2023-07-26T23:36:33Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/07/2305_14128_dr_icl_demonstrat">		<title>[2305.14128&#93; Dr.ICL: Demonstration-Retrieved In-context Learning</title>		<link>http://www.semanlink.net/doc/2023/07/2305_14128_dr_icl_demonstrat</link>		<description>&gt; While early studies primarily used a fixed or random set of demonstrations for all test queries, recent research suggests that retrieving semantically similar demonstrations to the input from a pool of available demonstrations results in better performance. This work expands the applicability of retrieval-based ICL approaches by demonstrating that even simple word-overlap similarity measures such as BM25 outperform randomly selected demonstrations.		</description>		<dc:date>2023-07-14T12:25:23Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/06/generative_ai_support_on_vertex">		<title>Generative AI support on Vertex AI generally available | Google Cloud Blog</title>		<link>http://www.semanlink.net/doc/2023/06/generative_ai_support_on_vertex</link>		<dc:date>2023-06-09T08:21:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/06/daniel_daza_sur_twitter_new_">		<title>Daniel Daza sur Twitter : &quot;BioBLP, a method for learning embeddings on multimodal knowledge graphs....&quot;</title>		<link>http://www.semanlink.net/doc/2023/06/daniel_daza_sur_twitter_new_</link>		<dc:date>2023-06-07T23:35:23Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/2305_11778_cross_lingual_supe">		<title>[2305.11778&#93; Cross-Lingual Supervision improves Large Language Models Pre-training</title>		<link>http://www.semanlink.net/doc/2023/05/2305_11778_cross_lingual_supe</link>		<description>&gt; We demonstrate that pre-training Large Language Models on a mixture of a self-supervised Language Modeling objective and the supervised Machine Translation objective, therefore including cross-lingual parallel data during pre-training, yields models with better in-context learning abilities.		</description>		<dc:date>2023-05-22T08:13:33Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/peter_j_liu_sur_twitter_her">		<title>Peter J. Liu sur Twitter : &quot;RLHF-alternative without RL&quot; </title>		<link>http://www.semanlink.net/doc/2023/05/peter_j_liu_sur_twitter_her</link>		<description>&gt; TL;DR: Works as well as RLHF, but a lot simpler. About as easy and efficient as fine-tuning. Much better than simply fine-tuning on good examples.		</description>		<dc:date>2023-05-18T09:53:46Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/2305_06897_afriqa_cross_ling">		<title>[2305.06897&#93; AfriQA: Cross-lingual Open-Retrieval Question Answering for African Languages</title>		<link>http://www.semanlink.net/doc/2023/05/2305_06897_afriqa_cross_ling</link>		<description>[Twitter&#93;(https://twitter.com/j___y_t/status/1657392003666128896)		</description>		<dc:date>2023-05-15T15:51:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/google_ai_palm_2_google_ai">		<title>Google AI PaLM 2 – Google AI</title>		<link>http://www.semanlink.net/doc/2023/05/google_ai_palm_2_google_ai</link>		<dc:date>2023-05-15T09:11:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/google_teases_project_tailwind_">		<title>Google teases Project Tailwind — a prototype AI notebook that learns from your documents - The Verge</title>		<link>http://www.semanlink.net/doc/2023/05/google_teases_project_tailwind_</link>		<dc:date>2023-05-14T10:43:45Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/skeskinen_bert_cpp_ggml_implem">		<title>skeskinen/bert.cpp: ggml implementation of BERT</title>		<link>http://www.semanlink.net/doc/2023/05/skeskinen_bert_cpp_ggml_implem</link>		<description>&gt; ggml inference of BERT neural net architecture with pooling and normalization from SentenceTransformers (sbert.net). High quality sentence embeddings in pure C++ (with C API). 
&gt;
&gt; The main goal of bert.cpp is to run the BERT model using **4-bit integer quantization on CPU**		</description>		<dc:date>2023-05-09T00:29:27Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/document_ai_%7C_google_for_deve">		<title>Document AI  |  Google for Developers - Software Development Guides, Tools &amp; More  |  Google Developers</title>		<link>http://www.semanlink.net/doc/2023/05/document_ai_%7C_google_for_deve</link>		<dc:date>2023-05-09T00:07:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/niels_rogge_sur_twitter_made">		<title>Niels Rogge sur Twitter : &quot;Made some new demo notebooks! - fine-tune @MetaAI&apos;s SAM and @GoogleAI&apos;s Pix2Struct on custom data&quot;</title>		<link>http://www.semanlink.net/doc/2023/05/niels_rogge_sur_twitter_made</link>		<dc:date>2023-05-09T00:00:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/05/google_we_have_no_moat_and_ne">		<title>Google &quot;We Have No Moat, And Neither Does OpenAI&quot;</title>		<link>http://www.semanlink.net/doc/2023/05/google_we_have_no_moat_and_ne</link>		<description>&gt; low-cost public involvement was enabled
by a vastly cheaper mechanism for fine tuning called low
rank adaptation ()[LoRA&#93;(tag:lora)

&gt; **Part of what makes LoRA so effective is that ... it’s stackable.**
&gt;
&gt; By contrast, training giant models from scratch not only
throws away the pretraining, but also any iterative
improvements that have been made on top.

&gt; LoRA updates are very cheap to produce (~$100) for the
most popular model sizes.

&gt; Many of these projects are saving time by training on
small, highly curated datasets...
&gt; These
datasets are built using synthetic methods (e.g. filtering
the best responses from an existing model) and
scavenging from other projects

&gt; Directly Competing With Open Source
Is a Losing Proposition

&gt; Paradoxically, the one clear winner in all of this is Meta.
Because the leaked model was theirs ([LLaMA&#93;(tag:llama)), they have
effectively garnered an entire planet&apos;s worth of free labor.
Since most open source innovation is happening on top of
their architecture, there is nothing stopping them from
directly incorporating it into their products.		</description>		<dc:date>2023-05-04T21:46:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/aran_komatsuzaki_sur_twitter__3">		<title>Aran Komatsuzaki sur Twitter : &quot;JaxPruner: A concise library for sparsity research An open-source JAX-based pruning and sparse training library for machine learning research repo&quot;</title>		<link>http://www.semanlink.net/doc/2023/04/aran_komatsuzaki_sur_twitter__3</link>		<dc:date>2023-04-28T07:58:57Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/2303_16839_mammut_a_simple_a">		<title>[2303.16839&#93; MaMMUT: A Simple Architecture for Joint Learning for MultiModal Tasks</title>		<link>http://www.semanlink.net/doc/2023/04/2303_16839_mammut_a_simple_a</link>		<description>The development of language models have moved from encoder-decoder to decoder-only designs. In addition, the common knowledge has it that the two most popular multimodal tasks, the generative and contrastive tasks, tend to conflict with one another, are hard to accommodate in one architecture, and further need complex adaptations for downstream tasks. We propose a novel paradigm of training with a decoder-only model for multimodal tasks

Related work: [CLIP: Connecting Text and Images&#93;(doc:2021/01/clip_connecting_text_and_images)		</description>		<dc:date>2023-04-25T00:33:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/domain_adaptation_with_generati">		<title>Domain Adaptation with Generative Pseudo-Labeling (GPL) | Pinecone</title>		<link>http://www.semanlink.net/doc/2023/04/domain_adaptation_with_generati</link>		<dc:date>2023-04-09T10:30:34Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/classifying_long_textual_docume">		<title>Classifying long textual documents (up to 25 000 tokens) using BERT | by Sinequa | (2020)</title>		<link>http://www.semanlink.net/doc/2023/04/classifying_long_textual_docume</link>		<description>&gt; long text +  additional textual metadata (such as title, abstract …) and categories (location, authors …).
		</description>		<dc:date>2023-04-07T11:37:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/diffusion_language_models_san">		<title>Diffusion language models – Sander Dieleman</title>		<link>http://www.semanlink.net/doc/2023/04/diffusion_language_models_san</link>		<description>&gt; Diffusion models have completely taken over generative modelling of perceptual signals -- why is autoregression still the name of the game for language modelling? And can we do anything about that?		</description>		<dc:date>2023-04-06T08:23:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/daniel_vila_suero_sur_twitter_">		<title>Daniel Vila Suero sur Twitter : &quot;Data quality is key for LLMs, but we&apos;re building Open Source LLMs with data of &quot;unknown&quot; quality... Introducing Alpaca GarbageCollector...&quot;</title>		<link>http://www.semanlink.net/doc/2023/04/daniel_vila_suero_sur_twitter_</link>		<description>&gt; a cross-lingual SetFit model to identify potential bad instructions in Alpaca-like datasets		</description>		<dc:date>2023-04-05T18:37:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/04/2304_01982_rethinking_the_rol">		<title>[2304.01982&#93; Rethinking the Role of Token Retrieval in Multi-Vector Retrieval</title>		<link>http://www.semanlink.net/doc/2023/04/2304_01982_rethinking_the_rol</link>		<description>&gt; Multi-vector retrievers like [ColBERT&#93;(tag:colbert) are powerful, but they come at the cost of complicated inference. In this paper, we ask: &quot;can token retrieval alone achieve great performance in multi-vector retrieval?&quot; [tweet&#93;(https://twitter.com/leejnhk/status/1643632578824396805?s=20)

&gt; The key insight of XTR is that the
token-retrieval in multi-vector models should be **trained to retrieve the most salient and informative
document tokens**, so that the score between a query and document can be computed using only the
retrieved information, just like how single-vector retrieval models work

&gt; This is an *amazing* way to re-engineer the scoring mechanism of late interaction / ColBERT retrievers! [src: ColBERT&apos;s author Omar Khattab&#93;(https://twitter.com/lateinteraction/status/1643439889902637056?s=20)

- scoring using only retrieved document terms
- imputing missing token scores using their upper bound		</description>		<dc:date>2023-04-05T08:33:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/03/niels_rogge_sur_twitter_exci">		<title>Niels Rogge sur Twitter : &quot;@GoogleAI&apos;s Pix2Struct now available in 🤗 Transformers!&quot;</title>		<link>http://www.semanlink.net/doc/2023/03/niels_rogge_sur_twitter_exci</link>		<description>&gt; A Transformer (vision encoder, language decoder). No OCR involved!. Pre-trained in a self-supervised fashion by predicting HTML based on masked portions of web page images.

&gt; Pix2Struct has been fine tuned on a variety of tasks and datasets, ranging from image captioning, visual question answering (VQA) over different inputs (books, charts, science diagrams), captioning UI components etc. ... We therefore advise you to use these models for the tasks they have been fine tuned on.

&gt; very similar to GPT-4&apos;s visual abilities, but open-source ;)		</description>		<dc:date>2023-03-27T23:15:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/03/enabling_python_virtualenv_in_j">		<title>Enabling Python VirtualEnv in JupyterLab | My Shitty Code</title>		<link>http://www.semanlink.net/doc/2023/03/enabling_python_virtualenv_in_j</link>		<dc:date>2023-03-08T13:59:47Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/2112_05682_self_attention_doe">		<title>[2112.05682&#93; Self-attention Does Not Need O(n^2) Memory</title>		<link>http://www.semanlink.net/doc/2023/02/2112_05682_self_attention_doe</link>		<dc:date>2023-02-27T12:58:02Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/2108_08877_sentence_t5_scala">		<title>[2108.08877&#93; Sentence-T5: Scalable Sentence Encoders from Pre-trained Text-to-Text Models</title>		<link>http://www.semanlink.net/doc/2023/02/2108_08877_sentence_t5_scala</link>		<dc:date>2023-02-17T18:20:47Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/maarten_grootendorst_sur_twitte">		<title>Maarten Grootendorst sur Twitter : &quot;The v0.14 release of BERTopic is here. Fine-tune your topic keywords and labels with models from @OpenAI, @huggingface, @CohereAI, @spacy_io, and @LangChainAI... An overview thread&quot;</title>		<link>http://www.semanlink.net/doc/2023/02/maarten_grootendorst_sur_twitte</link>		<dc:date>2023-02-15T13:56:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/jim_fan_sur_twitter_do_you_k">		<title>Jim Fan sur Twitter : &quot;Do you know that DeepMind has actually open-sourced the heart of AlphaGo &amp; AlphaZero?... &quot;</title>		<link>http://www.semanlink.net/doc/2023/02/jim_fan_sur_twitter_do_you_k</link>		<dc:date>2023-02-15T10:20:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/guiding_frozen_language_models_">		<title>Guiding Frozen Language Models with Learned Soft Prompts – Google AI Blog</title>		<link>http://www.semanlink.net/doc/2023/02/guiding_frozen_language_models_</link>		<dc:date>2023-02-14T10:42:51Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/2203_14465_star_bootstrappin">		<title>[2203.14465&#93; STaR: Bootstrapping Reasoning With Reasoning</title>		<link>http://www.semanlink.net/doc/2023/02/2203_14465_star_bootstrappin</link>		<description>&quot;Self-Taught Reasoner&quot; (STaR)
&gt; (to our knowledge) the first technique to allow a pre-trained large
language model to iteratively use its language modeling capacity to improve itself

&gt; Generating step-by-step &quot;chain-of-thought&quot; rationales improves language model
performance on complex reasoning tasks like mathematics or commonsense
question-answering. However, inducing language model rationale generation currently
requires either constructing massive rationale datasets or sacrificing accuracy
by using only few-shot inference. We propose **a technique to iteratively leverage a
small number of rationale examples and a large dataset without rationales**, to bootstrap
the ability to perform successively more complex reasoning.


		</description>		<dc:date>2023-02-07T16:40:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/google_announces_chatgpt_rival_">		<title>Google announces ChatGPT rival Bard, with wider availability in ‘coming weeks’ - The Verge</title>		<link>http://www.semanlink.net/doc/2023/02/google_announces_chatgpt_rival_</link>		<dc:date>2023-02-07T08:03:58Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/ramsri_goutham_golla_sur_twitte">		<title>Ramsri Goutham Golla sur Twitter : &quot;The most practical open-source competitor to @OpenAI &apos;s GPT-3 is Google&apos;s Flan-T5 Here are 5 Flan-T5 resources to try out easily, deploy, or fine-tune it! 🧵&quot; / Twitter</title>		<link>http://www.semanlink.net/doc/2023/02/ramsri_goutham_golla_sur_twitte</link>		<dc:date>2023-02-04T02:04:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/the_flan_collection_advancing_">		<title>The Flan Collection: Advancing open source methods for instruction tuning – Google AI Blog</title>		<link>http://www.semanlink.net/doc/2023/02/the_flan_collection_advancing_</link>		<description>&gt; The ability to reason on new tasks is mostly credited to training models on a wide variety of unique instructions, known as “instruction tuning”, which was introduced by FLAN and extended in T0, Super-Natural Instructions, MetaICL, and InstructGPT.		</description>		<dc:date>2023-02-02T09:14:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/02/shayne_longpre_sur_twitter_w">		<title>Shayne Longpre sur Twitter : &quot;What’s the best completely public competitor to #ChatGPT? Flan-T5 beats all public models we tested...&quot;</title>		<link>http://www.semanlink.net/doc/2023/02/shayne_longpre_sur_twitter_w</link>		<description>&gt; It&apos;s promising these results don&apos;t use any [#RLHF&#93;(tag:reinforcement_learning_from_human_feedback) data, or human &quot;alignment&quot;, which is expensive to collect and less publicly available.

&gt; Key takeaway: finetuning Flan-T5 is better and more compute-efficient than finetuning T5.[src&#93;(https://twitter.com/_jasonwei/status/1620864198262804481?s=20&amp;t=hMXLCdqcOFAEbjsfwc_yog)		</description>		<dc:date>2023-02-01T18:29:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/creer_un_notebook_jupyterlab_ve">		<title>Créer un notebook JupyterLab Vertex AI  |  Google Cloud</title>		<link>http://www.semanlink.net/doc/2023/01/creer_un_notebook_jupyterlab_ve</link>		<dc:date>2023-01-31T01:54:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/5_steps_to_go_from_a_notebook_t">		<title>5 steps to go from a notebook to a deployed model — The TensorFlow Blog</title>		<link>http://www.semanlink.net/doc/2023/01/5_steps_to_go_from_a_notebook_t</link>		<description>&gt; how to get from notebook experimentation to deployment in the cloud

**notebook execution feature**: run the notebook cell by cell on the Vertex AI managed training service. When you launch the training job, it’s going to run on a machine you won’t have access to after the job completes -&gt; have to save to a bucket

Launch the execution: Select the Execute button, give your execution a name, **then add a GPU**.

&gt; Now you know how to quickly launch serverless training jobs on Google Cloud

- **Deploy to an endpoint** 
- or use the **batch prediction feature** (if your use case does not require low latency predictions)

Get predictions

&gt; Now that this model is deployed to an endpoint, you can hit it like any other REST endpoint		</description>		<dc:date>2023-01-31T01:45:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/lamda_our_breakthrough_convers">		<title>LaMDA: our breakthrough conversation technology</title>		<link>http://www.semanlink.net/doc/2023/01/lamda_our_breakthrough_convers</link>		<dc:date>2023-01-28T15:20:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/an_empirical_analysis_of_comput">		<title>An empirical analysis of compute-optimal large language model training</title>		<link>http://www.semanlink.net/doc/2023/01/an_empirical_analysis_of_comput</link>		<description>&gt; the current large language models are far too large for their compute budget and are not being trained on enough data.		</description>		<dc:date>2023-01-26T23:33:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/characterizing_emergent_phenome">		<title>Characterizing Emergent Phenomena in Large Language Models – Google AI Blog</title>		<link>http://www.semanlink.net/doc/2023/01/characterizing_emergent_phenome</link>		<description>[Tweet&#93;(https://twitter.com/_jasonwei/status/1618331876623523844?s=20&amp;t=sMbTCnu16Od8vGBmo0x6ig)

&gt; unpredictable phenomenon that we refer to as emergent abilities of large language models. We consider an ability to be emergent if it is not present in smaller models but is present in larger models. Thus, emergent abilities cannot be predicted simply by extrapolating the performance of smaller models. The existence of such emergence implies that additional scaling could further expand the range of capabilities of language models.		</description>		<dc:date>2023-01-26T09:28:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/2301_08210_everything_is_conn">		<title>[2301.08210&#93; Everything is Connected: Graph Neural Networks</title>		<link>http://www.semanlink.net/doc/2023/01/2301_08210_everything_is_conn</link>		<description>&gt; **it is likely that the very cognition processes driving our reasoning and
decision-making are, in some sense, graph-structured.** That is, paraphrasing a quote
from Forrester (1971), nobody really imagines in their head all the information known
to them; rather, they imagine only selected concepts, and relationships between them,
and use those to represent the real system.

(yep, that&apos;s why I made semanlink)

&gt; Transformers are themselves a special case of GNNs		</description>		<dc:date>2023-01-21T14:01:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/multilingual_sentence_transform">		<title>Multilingual Sentence Transformers | Pinecone</title>		<link>http://www.semanlink.net/doc/2023/01/multilingual_sentence_transform</link>		<description>Focus on **Multilingual Knowledge Distillation**

&gt; recent method introduced by Nils Reimers and Iryna Gurevych in 2020
&gt; The teacher model is an already fine-tuned sentence transformer used for creating embeddings in a single language (most likely English). The student model is a transformer that has been pretrained on a multilingual corpus.		</description>		<dc:date>2023-01-13T01:45:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/alphafold%E2%80%99s_new_rival_meta_ai_">		<title>AlphaFold’s new rival? Meta AI predicts shape of 600 million proteins</title>		<link>http://www.semanlink.net/doc/2023/01/alphafold%E2%80%99s_new_rival_meta_ai_</link>		<dc:date>2023-01-11T19:26:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2023/01/andrej_karpathy_sur_twitter_">		<title>Andrej Karpathy sur Twitter : &quot;Great post (5mo ago) &quot;chinchilla&apos;s wild implications&quot; giving context to LLM goldrush shifting from model size to dataset size...&quot;</title>		<link>http://www.semanlink.net/doc/2023/01/andrej_karpathy_sur_twitter_</link>		<dc:date>2023-01-05T00:53:48Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/12/rohan_anil_sur_twitter_next_">		<title>Rohan Anil sur Twitter : &quot;Next big jump with Neural Network performance is going to happen when community embraces non-uniformity</title>		<link>http://www.semanlink.net/doc/2022/12/rohan_anil_sur_twitter_next_</link>		<dc:date>2022-12-18T10:02:21Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/12/valueerror_invalid_literal_for">		<title>ValueError &quot;invalid literal for int() with base 10&quot; in trainer.evaluate (dataset created from pandas) · Issue #228 · huggingface/setfit</title>		<link>http://www.semanlink.net/doc/2022/12/valueerror_invalid_literal_for</link>		<description>see &lt;https://github.com/huggingface/setfit/blob/main/notebooks/zero-shot-classification.ipynb&gt;
&gt; Note: some datasets on the Hugging Face Hub don&apos;t have a ClassLabel feature for the label column. In these cases, you should compute the candidate labels manually by first computing the id2label mapping as follows:		</description>		<dc:date>2022-12-13T11:46:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/11/few_shot_text_classification_c">		<title>Few-Shot Text Classification (Cloudera 2020)</title>		<link>http://www.semanlink.net/doc/2022/11/few_shot_text_classification_c</link>		<description>&gt; Sentence-BERT has been optimized… well, for sentences! It’s reasonable to suspect that SBERT’s representations of single words or short phrases like “Business” or “Science &amp; Technology” won’t be as semantically relevant as representations derived from a word-level method, like word2vec or GloVe		</description>		<dc:date>2022-11-24T14:16:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/11/one_of_the_biggest_problems_in_">		<title>One of the Biggest Problems in Biology Has Finally Been Solved - Scientific American</title>		<link>http://www.semanlink.net/doc/2022/11/one_of_the_biggest_problems_in_</link>		<description>Google DeepMind CEO Demis Hassabis explains how its AlphaFold AI program predicted the 3-D structure of every known protein		</description>		<dc:date>2022-11-01T09:45:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/2202_06991_transformer_memory">		<title>[2202.06991&#93; Transformer Memory as a Differentiable Search Index</title>		<link>http://www.semanlink.net/doc/2022/10/2202_06991_transformer_memory</link>		<description>&gt; In this paper, we demonstrate that information retrieval can be accomplished with a single Transformer, in which all information about the corpus is encoded in the parameters of the model. To this end, we introduce the Differentiable Search Index (DSI), a new paradigm that learns a text-to-text model that maps string queries directly to relevant docids; in other words, a DSI model answers queries directly using only its parameters, dramatically simplifying the whole retrieval process.		</description>		<dc:date>2022-10-25T00:04:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/tutorial_on_uncertainty_estimat">		<title>Tutorial on Uncertainty Estimation for NLP</title>		<link>http://www.semanlink.net/doc/2022/10/tutorial_on_uncertainty_estimat</link>		<dc:date>2022-10-18T15:02:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/stephanie_chan_sur_twitter_t">		<title>Stephanie Chan sur Twitter : &quot;Transformer inductive biases...&quot;</title>		<link>http://www.semanlink.net/doc/2022/10/stephanie_chan_sur_twitter_t</link>		<description>&gt; Transformers generalize differently from information stored in:
&gt;
&gt;- weights - mostly &quot;rule-based&quot;
&gt;- context - mostly &quot;exemplar-based&quot;
&gt;
&gt;This effect depends on (a) the training data (b) the size of the transformer		</description>		<dc:date>2022-10-14T15:49:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/lewis_tunstall_sur_twitter_t">		<title>Lewis Tunstall sur Twitter : &quot;The SetFit library for few-shot learning with Sentence Transformers now supports *multi-label text classification*...&quot;</title>		<link>http://www.semanlink.net/doc/2022/10/lewis_tunstall_sur_twitter_t</link>		<description>Multilabel support [github issue&#93;(https://github.com/huggingface/setfit/issues/65)		</description>		<dc:date>2022-10-14T15:24:53Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/huggingface_setfit_efficient_f">		<title>huggingface/setfit: Efficient few-shot learning with Sentence Transformers</title>		<link>http://www.semanlink.net/doc/2022/10/huggingface_setfit_efficient_f</link>		<dc:date>2022-10-12T23:41:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/santiago_sur_twitter_if_you_">		<title>Santiago sur Twitter : &quot;If you have an Apple M1 or M2 and don&apos;t take advantage of its GPU, I&apos;m about to change your life...&quot;</title>		<link>http://www.semanlink.net/doc/2022/10/santiago_sur_twitter_if_you_</link>		<description>&gt; These instructions allow TensorFlow to use your GPU		</description>		<dc:date>2022-10-07T19:33:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw">		<title>MaartenGr/KeyBERT: Minimal keyword extraction with BERT</title>		<link>http://www.semanlink.net/doc/2022/10/maartengr_keybert_minimal_keyw</link>		<dc:date>2022-10-06T14:37:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/10/yi_tay_sur_twitter_don_t_ret">		<title>Yi Tay sur Twitter : &quot;Don&apos;t retrieve, recite!...&quot;</title>		<link>http://www.semanlink.net/doc/2022/10/yi_tay_sur_twitter_don_t_ret</link>		<description>&gt; Introducing Recitation-Augmented Language models &quot;RECITE&quot; from @GoogleAI		</description>		<dc:date>2022-10-06T01:47:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/2205_11498_domain_adaptation_">		<title>[2205.11498&#93; Domain Adaptation for Memory-Efficient Dense Retrieval</title>		<link>http://www.semanlink.net/doc/2022/09/2205_11498_domain_adaptation_</link>		<description>Refers to [Binary Passage Retriever (BPR)&#93;(doc:2021/06/2106_00882_efficient_passage_)		</description>		<dc:date>2022-09-26T17:46:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/2209_11055_efficient_few_shot">		<title>[2209.11055&#93; Efficient Few-Shot Learning Without Prompts</title>		<link>http://www.semanlink.net/doc/2022/09/2209_11055_efficient_few_shot</link>		<description>[tweet&#93;(https://twitter.com/_akhaliq/status/1573109469646561280?s=20&amp;t=RTpK9dh90az0zT1Xg2ohpQ):

&gt; So if I have 4 classes and say 2 labels per class, I would first fine tune an ST on these 4 pairs and then vectorize the 8 total examples for fine-tuning the classifier		</description>		<dc:date>2022-09-23T10:26:46Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/google_ai_blog_tensorstore_for">		<title>Google AI Blog: TensorStore for High-Performance, Scalable Array Storage</title>		<link>http://www.semanlink.net/doc/2022/09/google_ai_blog_tensorstore_for</link>		<description>Use Case: 3D Brain Mapping		</description>		<dc:date>2022-09-23T02:24:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/promptbert_improving_bert_sente">		<title>PromptBERT improving BERT sentence embeddings with prompts - Ethan Kim</title>		<link>http://www.semanlink.net/doc/2022/09/promptbert_improving_bert_sente</link>		<dc:date>2022-09-16T10:31:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/2201_04337_promptbert_improv">		<title>[2201.04337&#93; PromptBERT: Improving BERT Sentence Embeddings with Prompts</title>		<link>http://www.semanlink.net/doc/2022/09/2201_04337_promptbert_improv</link>		<description>[PromptBERT improving BERT sentence embeddings with prompts - Ethan Kim&#93;(doc:2022/09/promptbert_improving_bert_sente)		</description>		<dc:date>2022-09-16T10:06:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/prompt_tuning_bert%F0%9F%8E%AF_commonlit_">		<title>Prompt Tuning BERT🎯:CommonLit Readability | Kaggle</title>		<link>http://www.semanlink.net/doc/2022/09/prompt_tuning_bert%F0%9F%8E%AF_commonlit_</link>		<description>&gt; Prompt-tuning is a simple yet effective mechanism for learning “soft prompts” to condition frozen language models to perform specific downstream tasks.Soft prompts are learned through backpropagation and can be tuned to incorporate signal from any number of labeled examples. Finally, we show that conditioning,a frozen model with soft prompts confers benefits in robustness to domain transfer, as compared to full model tuning.		</description>		<dc:date>2022-09-16T09:49:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/active_learning_for_bert_an_em">		<title>Active Learning for BERT: An Empirical Study - ACL Anthology</title>		<link>http://www.semanlink.net/doc/2022/09/active_learning_for_bert_an_em</link>		<description>&gt; The use of Actice Learning (AL)
with deep pre-trained models has so far received
little consideration.
&gt;
&gt; We study the
potential of (i) various AL strategies; (ii) in conjunction
with BERT, (iii) within a highly challenging
– yet common – real-world scenario of
class imbalance and scarce labeled data.

focused on binary classification

&gt; AL can boost BERT performance, especially in the most realistic scenario in which the initial set of labeled examples is created using keyword-based queries, resulting in a biased sample of the minority class. 

[Github&#93;(https://github.com/IBM/low-resource-text-classification-framework)		</description>		<dc:date>2022-09-02T16:08:49Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/09/2106_10199_bitfit_simple_par">		<title>[2106.10199&#93; BitFit: Simple Parameter-efficient Fine-tuning for Transformer-based Masked Language-models</title>		<link>http://www.semanlink.net/doc/2022/09/2106_10199_bitfit_simple_par</link>		<description>&gt;  BitFit, a sparse-finetuning method where only the bias-terms of the model (or a subset of them) are being modified. We show that **with small-to-medium training data, applying BitFit on pre-trained BERT models is competitive with (and sometimes better than) fine-tuning the entire model.** 

&gt; **these findings support the hypothesis that finetuning is mainly about exposing knowledge induced by language-modeling training, rather than learning new task-specific linguistic knowledge** -- ???!!!



&gt; The focus on modifying a small
group of parameters eases deployment, as the vast
majority of the parameters of the model are shared
between various NLP tasks

[GitHub&#93;(https://github.com/benzakenelad/BitFit)		</description>		<dc:date>2022-09-01T17:20:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/08/on_stability_of_few_sample_tran">		<title>On Stability of Few-Sample Transformer Fine-Tuning | Kaggle</title>		<link>http://www.semanlink.net/doc/2022/08/on_stability_of_few_sample_tran</link>		<description>[[2006.05987&#93; Revisiting Few-sample BERT Fine-tuning&#93;(doc:2022/03/2006_05987_revisiting_few_sam)		</description>		<dc:date>2022-08-29T01:13:58Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/08/unsupervised_learning_sentenc">		<title>Unsupervised Learning — Sentence-Transformers documentation</title>		<link>http://www.semanlink.net/doc/2022/08/unsupervised_learning_sentenc</link>		<description>&gt; In our paper TSDAE we compare approaches for sentence embedding tasks, and in GPL we compare them for semantic search tasks (given a query, find relevant passages). While the unsupervised approach achieve acceptable performances for sentence embedding tasks, they perform poorly for semantic search tasks.		</description>		<dc:date>2022-08-20T01:16:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/08/train_and_fine_tune_sentence_tr">		<title>Train and Fine-Tune Sentence Transformers Models</title>		<link>http://www.semanlink.net/doc/2022/08/train_and_fine_tune_sentence_tr</link>		<dc:date>2022-08-13T09:49:57Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/07/2205_00820_entity_aware_trans">		<title>[2205.00820&#93; Entity-aware Transformers for Entity Search</title>		<link>http://www.semanlink.net/doc/2022/07/2205_00820_entity_aware_trans</link>		<description>&gt; **Do BERT-based entity
retrieval models benefit from additional entity information stored
in knowledge graphs?** To address this research question, we map
entity embeddings into the same input space as a pre-trained BERT
model and inject these entity embeddings into the BERT model.
This entity-enriched language model is then employed on the entity
retrieval task.

&gt; we observe empirically that
the entity-enriched BERT models **enable fine-tuning on limited
training data**, which otherwise would not be feasible due to the
known instabilities of BERT in few-sample fine-tuning

Uses [Wikipedia2Vec&#93;(tag:wikipedia2vec) as graph embedding method		</description>		<dc:date>2022-07-12T08:18:56Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/07/leshem_choshen_sur_twitter_c">		<title>Leshem Choshen sur Twitter : &quot;Computational (Chomskian) hierarchies can predict OOD capabilities...&quot;</title>		<link>http://www.semanlink.net/doc/2022/07/leshem_choshen_sur_twitter_c</link>		<description>About a paper by DeepMind [&quot;Neural Networks and the Chomsky Hierarchy&quot;&#93;(https://arxiv.org/abs/2207.02098)

&gt; for our subset of tasks, RNNs and Transformers fail to generalize on non-regular tasks... only networks augmented with structured memory (such as a stack or memory tape) can successfully generalize on context-free and context-sensitive tasks		</description>		<dc:date>2022-07-11T11:10:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/07/2206_10658_questions_are_all_">		<title>[2206.10658&#93; Questions Are All You Need to Train a Dense Passage Retriever</title>		<link>http://www.semanlink.net/doc/2022/07/2206_10658_questions_are_all_</link>		<description>&gt; **approach for training dense retrieval models that does not require any labeled training data**. Dense retrieval is a central challenge for open-domain tasks, such as Open QA, where state-of-the-art methods typically require large supervised datasets with custom hard-negative mining and denoising of positive examples.
&gt;
&gt; ART, in contrast, only requires access to unpaired inputs and outputs (e.g. questions and potential answer documents).
&gt;
&gt; It uses a new document-retrieval autoencoding scheme, where
&gt; 1. an input question is used to retrieve a set of evidence documents, and
&gt; 2. the documents are then used to compute the probability of reconstructing the original question.
&gt;
&gt; Training for retrieval based on question reconstruction enables effective unsupervised learning of both document and question encoders, which can be later incorporated into complete Open QA systems without any further finetuning. 

[Tweet&#93;(doc:2022/07/devendra_singh_sachan_sur_twitt)

&gt; Given an
input question, ART first retrieves a small set
of possible evidences documents. It then recon
structs
the original question by attending to these
documents
&gt;
&gt; The
key idea in ART is to consider the retrieved documents
as a noisy representation of the original
question and question reconstruction probability
as a way of denoising that provides soft-labels for
how likely each document is to have been the correct
result

Refers to [[IZACARD 2012.04584&#93; Distilling Knowledge from Reader to Retriever for Question Answering&#93;(doc:2020/12/2012_04584_distilling_knowled)		</description>		<dc:date>2022-07-06T23:39:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/unveiling_transformers_with_leg">		<title>Unveiling Transformers with LEGO - YouTube</title>		<link>http://www.semanlink.net/doc/2022/06/unveiling_transformers_with_leg</link>		<description>&gt; To me, what&apos;s good about transformers is that they have relative filters. I mean **a standard NN tests an input against a fixed filter w, but here we test part of x against another part of x**. (#[Self-Attention&#93;(tag:self_attention))
&gt;
&gt; This potentially allows for reasonning to emerge: the network can associate concepts that it encounters, compare them, make analogies

&gt; LEGO: Learning Equality and Group Operations. It&apos;s a very **basic reasoning task**, where a sentence is made of clauses defining variables as a function of some other variable, and the goal is to **resolve the value of the variables**.		</description>		<dc:date>2022-06-30T14:21:53Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/using_bert_for_classifying_docu">		<title>Using BERT For Classifying Documents with Long Texts | by Armand Olivares | Medium</title>		<link>http://www.semanlink.net/doc/2022/06/using_bert_for_classifying_docu</link>		<dc:date>2022-06-29T18:09:51Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/chris_olah_sur_twitter_i_m_e">		<title>Chris Olah sur Twitter : &quot;I&apos;m excited to *finally* be making progress on understanding the first MLP layer in large transformer LMs. I&apos;ve tried really hard and prior to SoLU had little success.&quot; / Twitter</title>		<link>http://www.semanlink.net/doc/2022/06/chris_olah_sur_twitter_i_m_e</link>		<dc:date>2022-06-27T19:48:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/google_ai_blog_limoe_learning">		<title>Google AI Blog: LIMoE: Learning Multiple Modalities with One Sparse Mixture-of-Experts Model</title>		<link>http://www.semanlink.net/doc/2022/06/google_ai_blog_limoe_learning</link>		<description>&gt; Sparse models stand out among the most promising approaches for the future of deep learning. Instead of every part of a model processing every input (“dense” modeling), sparse models employing conditional computation learn to route individual inputs to different “experts” in a potentially huge network		</description>		<dc:date>2022-06-26T01:20:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/sentence_bert_model_in_onnx_for">		<title>sentence bert model in onnx format · Issue #46 · UKPLab/sentence-transformers</title>		<link>http://www.semanlink.net/doc/2022/06/sentence_bert_model_in_onnx_for</link>		<dc:date>2022-06-13T12:38:47Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/2205_15952_knowledge_graph_">		<title>[2205.15952&#93; Knowledge Graph -- Deep Learning: A Case Study in Question Answering in Aviation Safety Domain</title>		<link>http://www.semanlink.net/doc/2022/06/2205_15952_knowledge_graph_</link>		<dc:date>2022-06-11T01:48:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/domain_transfer_with_ggpl_germ">		<title>Domain transfer with GGPL: German Generative Pseudo Labeling 🥨 | by Matthias Richter | Jun, 2022 | ML6team</title>		<link>http://www.semanlink.net/doc/2022/06/domain_transfer_with_ggpl_germ</link>		<dc:date>2022-06-02T13:55:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/06/nils_reimers_sur_twitter_gpl">		<title>Nils Reimers sur Twitter : &quot;GPL goes multi-lingual...&quot;</title>		<link>http://www.semanlink.net/doc/2022/06/nils_reimers_sur_twitter_gpl</link>		<description>[Domain transfer with GGPL: German Generative Pseudo Labeling&#93;(doc:2022/06/domain_transfer_with_ggpl_germ)		</description>		<dc:date>2022-06-01T17:45:24Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/2205_08184_skill_structured_">		<title>[2205.08184&#93; SKILL: Structured Knowledge Infusion for Large Language Models</title>		<link>http://www.semanlink.net/doc/2022/05/2205_08184_skill_structured_</link>		<description>&gt; a method to infuse
structured knowledge into LLMs, by directly
training T5 models on factual triples of knowledge
graphs

&gt; The
models pre-trained on factual triples compare
competitively with the ones on natural language
sentences that contain the same knowledge.

&gt; The proposed method has an advantage that no alignment between the knowledge graph and text corpus is required

		</description>		<dc:date>2022-05-18T23:57:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/2205_05131_unifying_language_">		<title>[2205.05131&#93; Unifying Language Learning Paradigms</title>		<link>http://www.semanlink.net/doc/2022/05/2205_05131_unifying_language_</link>		<dc:date>2022-05-12T12:12:04Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/bertopic_the_future_of_topic_m">		<title>BERTopic: The Future of Topic Modeling | Pinecone</title>		<link>http://www.semanlink.net/doc/2022/05/bertopic_the_future_of_topic_m</link>		<dc:date>2022-05-12T09:01:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/2205_04260_ease_entity_aware">		<title>[2205.04260&#93; EASE: Entity-Aware Contrastive Learning of Sentence Embedding</title>		<link>http://www.semanlink.net/doc/2022/05/2205_04260_ease_entity_aware</link>		<description>&gt; we explore a type of supervision
that has been under-explored in the literature: entity
hyperlink annotations from Wikipedia.
&gt;
&gt; entities have been shown to
be a strong indicator of text semantics
&gt; 
&gt; a method for mining hard negatives
based on the entity type

Uses wikipedia2vec

&gt; the reliance on Wikipedia for training
data may limit the application of the models
to specific domains (e.g., general or encyclopedia
domains). To apply EASE to other domains, one
may need to annotate text from the domain either
manually or automatically.		</description>		<dc:date>2022-05-11T01:25:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/05/2203_08913_memorizing_transfo">		<title>[2203.08913&#93; Memorizing Transformers</title>		<link>http://www.semanlink.net/doc/2022/05/2203_08913_memorizing_transfo</link>		<description>[tweet&#93;(https://twitter.com/LiamFedus/status/1522605777961119745?s=20&amp;t=Jt9GBjNcFw6TqeqYvz_BRA): Memorizing Transformers which increases context length up to 262k by an external memory of (keys, values) for that document. 
- Matches quality of Transformers 5x larger
- Can fine-tune a prior pre-trained models to use it

&gt; Language models typically need to be trained or finetuned in order to acquire new knowledge, which involves updating their weights. We instead envision language models that can simply read and memorize new data at inference time, thus acquiring new knowledge immediately		</description>		<dc:date>2022-05-07T09:01:26Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/ramsri_goutham_golla_sur_twitte">		<title>Ramsri Goutham Golla sur Twitter : &quot;Hi @Nils_Reimers For GPL you used &quot;msmarco-distilbert-base-tas-b&quot; model and ...&quot;</title>		<link>http://www.semanlink.net/doc/2022/04/ramsri_goutham_golla_sur_twitte</link>		<dc:date>2022-04-27T22:17:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/1909_00426_global_entity_disa">		<title>[1909.00426&#93; Global Entity Disambiguation with BERT</title>		<link>http://www.semanlink.net/doc/2022/04/1909_00426_global_entity_disa</link>		<dc:date>2022-04-18T19:49:22Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/2110_08151_mluke_the_power_o">		<title>[2110.08151&#93; mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models</title>		<link>http://www.semanlink.net/doc/2022/04/2110_08151_mluke_the_power_o</link>		<description>[Ikuya Yamada sur Twitter : &quot;Is entity representation effective to improve multilingual language models?...&quot;&#93;(doc:2022/04/ikuya_yamada_sur_twitter_is_)

&gt; Recent studies have shown that multilingual pretrained language models can be effectively improved with cross-lingual alignment information from Wikipedia entities. However, **existing methods only exploit entity information in pretraining and do not explicitly use entities in downstream tasks**. In this study, we explore the **effectiveness of leveraging entity representations for downstream cross-lingual tasks**.
&gt;
&gt; the key insight is that incorporating entity representations into the input allows us to extract more language-agnostic features. 

[Github&#93;(https://github.com/studio-ousia/luke)

&gt; Entity representations are known to enhance
language models in mono-lingual settings
(Zhang et al., 2019: [ERNIE&#93;(tag:ernie.html); Peters et al., 2019:  [[1909.04164&#93; Knowledge Enhanced Contextual Word Representations&#93;(doc:2020/05/1909_04164_knowledge_enhanced); Wang et al.,
2021 [[1911.06136&#93; KEPLER: A Unified Model for Knowledge Embedding and Pre-trained Language Representation&#93;(doc:2020/11/1911_06136_kepler_a_unified_); Xiong et al., 2020; Yamada et al., 2020: [[2010.01057&#93; LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention&#93;(doc:2020/11/2010_01057_luke_deep_context))
presumably by introducing real-world knowledge.
We show that using entity representations facilitates
cross-lingual transfer by providing languageindependent
features.
&gt;
&gt; Multilingual extension of LUKE. The model is trained with the multilingual
masked language modeling (MLM) task as well
as the masked entity prediction (MEP) task with
Wikipedia entity embeddings

&gt; We investigate two ways of using the entity representations
in cross-lingual transfer tasks:
&gt; 1. perform
entity linking for the input text, and append
the detected entity tokens to the input sequence.
The entity tokens are expected to provide language independent
features to the model
&gt; 2. use the entity
[MASK&#93; token from the MEP task as a languageindependent
feature extractor.		</description>		<dc:date>2022-04-17T23:20:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/ikuya_yamada_sur_twitter_is_">		<title>Ikuya Yamada sur Twitter : &quot;Is entity representation effective to improve multilingual language models?...&quot;</title>		<link>http://www.semanlink.net/doc/2022/04/ikuya_yamada_sur_twitter_is_</link>		<description>[[2110.08151&#93; mLUKE: The Power of Entity Representations in Multilingual Pretrained Language Models&#93;(doc:2022/04/2110_08151_mluke_the_power_o)

&gt; mLUKE, an extension of [LUKE&#93;(tag:luke) based on 1M Wikidata entity embeddings shared across languages

&gt; mLUKE solves downstream tasks by using its language-agnostic entity embeddings as inputs. 

&gt; entity representations are shared across languages during pretraining -&gt; they are much more language-agnostic than word representations		</description>		<dc:date>2022-04-13T15:46:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/tu_vu_sur_twitter_enormous_l">		<title>Tu Vu sur Twitter : &quot;Enormous LMs like GPT-3 exhibit impressive few-shot performance, but w/ self-training a BERT base sized model can achieve much better results!</title>		<link>http://www.semanlink.net/doc/2022/04/tu_vu_sur_twitter_enormous_l</link>		<description>&gt; [[2109.06270&#93; STraTA: Self-Training with Task Augmentation for Better Few-shot Learning&#93;(doc:2022/04/2109_06270_strata_self_train)

[Github&#93;(https://github.com/google-research/google-research/tree/master/STraTA) [at HuggingFace&#93;(https://github.com/huggingface/transformers/tree/main/examples/research_projects/self-training-text-classification)

--
Remark: Like [[2203.10581&#93; Cluster &amp; Tune: Boost Cold Start Performance in Text Classification&#93;(doc:2022/04/2203_10581_cluster_tune_bo), adds an intermediate fine-tuning step // TODO compare		</description>		<dc:date>2022-04-13T13:37:58Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/google_ai_blog_pathways_langua">		<title>Google AI Blog: Pathways Language Model (PaLM): Scaling to 540 Billion Parameters for Breakthrough Performance</title>		<link>http://www.semanlink.net/doc/2022/04/google_ai_blog_pathways_langua</link>		<dc:date>2022-04-05T22:16:07Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/04/2008_11228_a_simple_method_fo">		<title>[2008.11228&#93; A simple method for domain adaptation of sentence embeddings</title>		<link>http://www.semanlink.net/doc/2022/04/2008_11228_a_simple_method_fo</link>		<dc:date>2022-04-01T14:07:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/2004_05119_beyond_fine_tuning">		<title>[2004.05119&#93; Beyond Fine-tuning: Few-Sample Sentence Embedding Transfer</title>		<link>http://www.semanlink.net/doc/2022/03/2004_05119_beyond_fine_tuning</link>		<description>&gt; Fine-tuning (FT) pre-trained sentence embedding models on small datasets has been shown to have limitations. In this paper we show that concatenating the embeddings from the pre-trained model with those from a simple sentence embedding model trained only on the target data, can improve over the performance of FT for few-sample tasks		</description>		<dc:date>2022-03-31T21:04:02Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/sentence_transformer_fine_tunin">		<title>Sentence Transformer Fine-Tuning (SetFit): Outperforming GPT-3 on few-shot Text-Classification while being 1600 times smaller | by Moshe Wasserblat (2021-12)</title>		<link>http://www.semanlink.net/doc/2022/03/sentence_transformer_fine_tunin</link>		<description>Finetuning d&apos;un SBERT sur une tâche de classification (in fine, produit un SBERT)

&gt; **Few-shot text classification  based on fine-tuning a Sentence Transformer with task-specific data** that can easily be implemented with the sentence-transformers library

&gt; Surprisingly, we did not find any
work that performed an end-to-end ST fine-tuning for text classification in
a Siamese manner.

[COLAB&#93;(https://colab.research.google.com/github/MosheWasserb/SetFit/blob/main/SetFit_SST_2.ipynb)

[Nils Reimers sur Twitter&#93;(doc:2022/03/nils_reimers_sur_twitter_gre)		</description>		<dc:date>2022-03-31T10:49:48Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/nils_reimers_sur_twitter_gre">		<title>Nils Reimers sur Twitter : &quot;Great post on SetFit&quot;</title>		<link>http://www.semanlink.net/doc/2022/03/nils_reimers_sur_twitter_gre</link>		<description>About [Sentence Transformer Fine-Tuning (SetFit): Outperforming GPT-3 on few-shot Text-Classification while being 1600 times smaller | by Moshe Wasserblat&#93;(doc:2022/03/sentence_transformer_fine_tunin)
&gt; - Outperforms GPT-3 in few-shot text-classification (50 labeled examples, secret test set)
&gt; - 1600 times smaller
&gt; - Can be run on your CPU
&gt; - No limitation on the number of training examples
&gt; - Just few lines of code needed		</description>		<dc:date>2022-03-31T10:48:50Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/sentence_embedding_fine_tuning_">		<title>Sentence Embedding Fine-tuning for the French Language | by La Javaness R&amp;D | Feb, 2022 | Medium</title>		<link>http://www.semanlink.net/doc/2022/03/sentence_embedding_fine_tuning_</link>		<dc:date>2022-03-31T10:06:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/domain_adaptation_sentence_tr">		<title>Domain Adaptation — Sentence-Transformers documentation</title>		<link>http://www.semanlink.net/doc/2022/03/domain_adaptation_sentence_tr</link>		<dc:date>2022-03-31T08:59:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/2203_14655_few_shot_learning_">		<title>[2203.14655&#93; Few-Shot Learning with Siamese Networks and Label Tuning</title>		<link>http://www.semanlink.net/doc/2022/03/2203_14655_few_shot_learning_</link>		<description>&gt; the problem of building text classifiers with little or no training data.
&gt;
&gt; In recent years, an approach based on neural textual entailment models has been found to give strong results on a diverse range of tasks. 

(cf. #[NLI&#93;(tag:nli), using the input text as the premise and the text representing the label as the hypothesis)

&gt; In this work, we show that **with proper pre-training, Siamese Networks that embed texts and labels** offer a competitive alternative.
&gt;
&gt; We introduce **label tuning: fine-tuning the label embeddings only**. While giving lower performance than model fine-tuning (which updates all params of the model), this approach has the architectural advantage that a single encoder can be shared by many different tasks (we only fine-tune the label embeddings)
&gt; The drop in quality can
be compensated by using a variant of **[Knowledge distillation&#93;(tag:knowledge_distillation)**

[Github&#93;(https://tinyurl.com/label-tuning), [Tweet&#93;(doc:2022/03/thomas_muller_sur_twitter_pa)		</description>		<dc:date>2022-03-30T16:14:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/2006_05987_revisiting_few_sam">		<title>[2006.05987&#93; Revisiting Few-sample BERT Fine-tuning</title>		<link>http://www.semanlink.net/doc/2022/03/2006_05987_revisiting_few_sam</link>		<description>&gt; A study of fine-tuning of BERT contextual representations, with focus on commonly observed instabilities in few-sample scenarios.

&gt; The most commonly used optimizer for fine-tuning BERT is BERTADAM, a modified version of
the ADAM first-order stochastic optimization method. It differs from the original ADAM algorithm
(Kingma &amp; Ba, 2014) in omitting a bias correction step.
&gt;
&gt; ... We observe that
the bias correction omission influences the learning rate, especially early in the fine-tuning process,
and is one of the primary reasons for instability in fine-tuning BERT

and this is bad when finetuning with less than 10K samples. Pb included in many
&gt; open source libraries, including the official
implementation huggingface’s Transformers

How to solve pb in HuggingFace?

&gt; HuggingFace Transformers AdamW has correct_bias parameter set to True by default. Still it&apos;s worth noting the importance this parameter serves. [src&#93;(doc:2022/08/on_stability_of_few_sample_tran)

		</description>		<dc:date>2022-03-21T10:46:15Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/nlp_%7C_how_to_add_a_domain_speci">		<title>NLP | How to add a domain-specific vocabulary (new tokens) to a subword tokenizer already trained like BERT WordPiece | by Pierre Guillou | Medium</title>		<link>http://www.semanlink.net/doc/2022/03/nlp_%7C_how_to_add_a_domain_speci</link>		<dc:date>2022-03-18T17:41:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/studio_ousia_sur_twitter_now">		<title>Studio Ousia sur Twitter : &quot;Now using LUKE is easier than ever!&quot; / Twitter</title>		<link>http://www.semanlink.net/doc/2022/03/studio_ousia_sur_twitter_now</link>		<dc:date>2022-03-15T20:47:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/andrew_trask_about_large_langua">		<title>Andrew Trask about large language models: The &quot;bigness&quot; is a temporary flaw, not a permanent feature of progress&quot;</title>		<link>http://www.semanlink.net/doc/2022/03/andrew_trask_about_large_langua</link>		<dc:date>2022-03-13T09:16:01Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/maartengr_bertopic_leveraging_">		<title>MaartenGr/BERTopic: Leveraging BERT and c-TF-IDF to create easily interpretable topics.</title>		<link>http://www.semanlink.net/doc/2022/03/maartengr_bertopic_leveraging_</link>		<description>&gt; topic modeling technique that leverages 🤗 transformers and [c-TF-IDF&#93;(https://github.com/MaartenGr/cTFIDF) to create dense clusters allowing for easily interpretable topics whilst keeping important words in the topic descriptions.

refers to [Top2Vec&#93;(doc:2022/03/ddangelov_top2vec_top2vec_lear)

[youtube&#93;(https://www.youtube.com/watch?v=Qub3PrFvauI)

[tweet&#93;(https://twitter.com/JayAlammar/status/1594681648121102336?s=20&amp;t=R0G_LrajK9WBtzypwXtD7Q)		</description>		<dc:date>2022-03-10T09:41:50Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/document_matching_for_job_descr">		<title>Document Matching for Job Descriptions | Semantic Scholar (2021)</title>		<link>http://www.semanlink.net/doc/2022/03/document_matching_for_job_descr</link>		<description>&gt; We train a document encoder to match online job descriptions to one of many standardized job roles from Singapore’s Skills Framework. The encoder generates semantically meaningful document encodings from textual descriptions of job roles, which are then compared using Cosine Similarity to determine matching. During training, we implement the methodology used by Sentence-BERT, fine tuning pre-trained BERT models using a siamese network architecture on labelled document pairs.		</description>		<dc:date>2022-03-09T18:18:50Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/03/naver_labs_europe_nils_reim">		<title>NAVER LABS Europe : &quot;@Nils_Reimers of @huggingface on &apos;Unsupervised domain adaptation for neural search&apos;&quot;</title>		<link>http://www.semanlink.net/doc/2022/03/naver_labs_europe_nils_reim</link>		<dc:date>2022-03-09T10:53:24Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro">		<title>[2109.06304&#93; Phrase-BERT: Improved Phrase Embeddings from BERT with an Application to Corpus Exploration</title>		<link>http://www.semanlink.net/doc/2022/02/2109_06304_phrase_bert_impro</link>		<dc:date>2022-02-25T17:19:37Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_cre">		<title>Nils Reimers sur Twitter : &quot;Creating intent classes for chatbots is challenging This tutorial shows how to use sentence-transformers to find potentially overlapping intent classes and how to improve your data annotation work.&quot; / Twitter</title>		<link>http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_cre</link>		<dc:date>2022-02-19T22:55:07Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_how">		<title>Nils Reimers sur Twitter : &quot;how to use the fast clustering algorithm from sentence-transformers...&quot;</title>		<link>http://www.semanlink.net/doc/2022/02/nils_reimers_sur_twitter_how</link>		<description>Clustering millions of sentences to optimize the ML-workflow		</description>		<dc:date>2022-02-19T10:37:15Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/02/sentence_transformers_fast_clus">		<title>sentence-transformers/fast_clustering.py at master · UKPLab/sentence-transformers</title>		<link>http://www.semanlink.net/doc/2022/02/sentence_transformers_fast_clus</link>		<description>&gt; This is a more complex example on performing clustering on large scale dataset. This examples find in a large set of sentences local communities, i.e., groups of sentences that are highly similar. You can freely configure the threshold what is considered as similar. A high threshold will only find extremely similar sentences, a lower threshold will find more sentence that are less similar. A second parameter is &apos;min_community_size&apos;: Only communities with at least a certain number of sentences will be returned. The method for finding the communities is extremely fast, for clustering 50k sentences it requires only 5 seconds (plus embedding comuptation). In this example, we download a large set of questions from Quora and then find similar questions in this set.		</description>		<dc:date>2022-02-18T14:45:22Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/01/gsarti_scibert_nli_%C2%B7_hugging_fa">		<title>gsarti/scibert-nli · Hugging Face</title>		<link>http://www.semanlink.net/doc/2022/01/gsarti_scibert_nli_%C2%B7_hugging_fa</link>		<description>SciBERT fine-tuned on the SNLI and the MultiNLI datasets using the sentence-transformers library to produce universal sentence embeddings		</description>		<dc:date>2022-01-29T15:52:08Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/01/semantic_search_sentence_tran">		<title>Semantic Search — Sentence-Transformers documentation</title>		<link>http://www.semanlink.net/doc/2022/01/semantic_search_sentence_tran</link>		<description>**symmetric** semantic search vs **asymmetric** semantic search

&gt; - Suitable models for symmetric semantic search: Pre-Trained Sentence Embedding
&gt; - Suitable models for asymmetric semantic search: Pre-Trained MS MARCO Models		</description>		<dc:date>2022-01-29T15:28:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/01/1906_00300_latent_retrieval_f">		<title>[1906.00300&#93; Latent Retrieval for Weakly Supervised Open Domain Question Answering</title>		<link>http://www.semanlink.net/doc/2022/01/1906_00300_latent_retrieval_f</link>		<description>&gt; The key insight of this work is that end-to-end learning is possible if we pre-train the retriever with an unsupervised Inverse Cloze Task (ICT). In ICT, a sentence is treated as a pseudo- question, and its context is treated as pseudo- evidence		</description>		<dc:date>2022-01-11T11:06:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2022/01/domain_transfer_with_bert_%7C_pin">		<title>Domain Transfer with BERT | Pinecone</title>		<link>http://www.semanlink.net/doc/2022/01/domain_transfer_with_bert_%7C_pin</link>		<dc:date>2022-01-04T21:00:34Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/anthropic_sur_twitter_a_math">		<title>Anthropic sur Twitter : &quot;a mathematical framework for trying to reverse engineer transformer language models...&quot;</title>		<link>http://www.semanlink.net/doc/2021/12/anthropic_sur_twitter_a_math</link>		<dc:date>2021-12-23T00:41:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/making_the_most_of_data_augmen">		<title>Making the Most of Data: Augmentation with BERT | Pinecone</title>		<link>http://www.semanlink.net/doc/2021/12/making_the_most_of_data_augmen</link>		<dc:date>2021-12-18T10:05:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/using_pretrained_sbert_model_in">		<title>Using pretrained SBERT model in cross-encoder · Issue #726 · UKPLab/sentence-transformers</title>		<link>http://www.semanlink.net/doc/2021/12/using_pretrained_sbert_model_in</link>		<description>&gt; so would it be a good idea to finetune a SBERT model on a cross-encoder task?
&gt;
&gt; The SBERT models are regular transformers model and hence can be used as base for cross encoders. Sometimes it could be helpful, otherwise it is better to use the original models. ([Nils Reimers&#93;(tag:nils_reimers))		</description>		<dc:date>2021-12-17T00:41:33Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/advance_bert_model_via_transfer">		<title>Advance BERT model via transferring knowledge from Cross-Encoders to Bi-Encoders | by Chien Vu | Towards Data Science</title>		<link>http://www.semanlink.net/doc/2021/12/advance_bert_model_via_transfer</link>		<description>Data Augmentation Method to improve SBERT Bi-Encoders for Pairwise Sentence Scoring Tasks (Semantic sentence tasks)		</description>		<dc:date>2021-12-17T00:26:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/2112_07577_gpl_generative_ps">		<title>[2112.07577&#93; GPL: Generative Pseudo Labeling for Unsupervised Domain Adaptation of Dense Retrieval</title>		<link>http://www.semanlink.net/doc/2021/12/2112_07577_gpl_generative_ps</link>		<description>An unsupervised domain adaptation technique for dense retrieval models

1. synthetic queries
are generated for each passage from the target corpus (using an existing pre-trained [T5&#93;(tag:text_to_text_transfer_transformer)
encoder-decoder)
2. the generated queries are used for mining negative
passages (retrieving the most similar
paragraphs using an existing dense retrieval
model == hard negatives!)
3. the query-passage pairs are labeled by a cross-encoder and used to train the domain-adapted
dense retriever (using method described in [Hofstätter et al.,
2020&#93;(doc:2021/12/2010_02666_improving_efficien))

[Nils Reimers sur Twitter&#93;(doc:2021/12/nils_reimers_sur_twitter_do_), [GitHub&#93;(https://github.com/UKPLab/gpl),  by the author of [TSDAE&#93;(doc:2021/09/2104_06979_tsdae_using_trans)

Claims to improve &quot;Doc2Query&quot; [Document Expansion by Query Prediction&#93;(doc:2022/01/1904_08375_document_expansion): ([src&#93;(https://twitter.com/KexinWang2049/status/1471435779415150598))

&gt; - GPL: Uses doc2query to construct synthetic data and does knowledge distillation (i.e. training) on that data.
&gt; - Doc2query: Generates queries to extend the documents and use BM25 on top of them w/o training.		</description>		<dc:date>2021-12-15T18:23:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/improving_language_models_by_re">		<title>Improving Language Models by Retrieving from Trillions of Tokens | DeepMind</title>		<link>http://www.semanlink.net/doc/2021/12/improving_language_models_by_re</link>		<description>&gt; Retrieval-Enhanced Transformer (Retro)		</description>		<dc:date>2021-12-09T10:11:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/semantic_search_through_a_vecto">		<title>Semantic search through a vectorized Wikipedia (SentenceBERT) with the Weaviate vector search engine</title>		<link>http://www.semanlink.net/doc/2021/12/semantic_search_through_a_vecto</link>		<dc:date>2021-12-05T10:48:53Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/12/unsupervised_extractive_summari">		<title>Unsupervised_Extractive_Summarization - a Hugging Face Space by Hellisotherpeople</title>		<link>http://www.semanlink.net/doc/2021/12/unsupervised_extractive_summari</link>		<description>Unsupervised Extractive Text Summarization and Semantic Search

[Github&#93;(https://github.com/Hellisotherpeople/CX_DB8)		</description>		<dc:date>2021-12-03T09:28:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/11/unsupervised_training_for_sente">		<title>Unsupervised Training for Sentence Transformers | Pinecone</title>		<link>http://www.semanlink.net/doc/2021/11/unsupervised_training_for_sente</link>		<description>Blog post about [[2104.06979&#93; TSDAE: Using Transformer-based Sequential Denoising Auto-Encoder for Unsupervised Sentence Embedding Learning&#93;(doc:2021/09/2104_06979_tsdae_using_trans)

&gt; Fine-tuning with TSDAE simply cannot compete in terms of performance against supervised methods.
However, **the point and value of TSDAE is that it allows us to fine-tune models for use-cases where we have no data**. Specific domains with unique terminology or low resource languages.		</description>		<dc:date>2021-11-24T21:03:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/11/how_to_fine_tune_sentence_bert_">		<title>How to Fine-Tune Sentence-BERT for Question Answering | Capital One</title>		<link>http://www.semanlink.net/doc/2021/11/how_to_fine_tune_sentence_bert_</link>		<description>&gt; tutorial on using the sentence-transformers library to fine-tune Sentence-BERT for question matching		</description>		<dc:date>2021-11-21T12:38:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/11/multilingual_sentence_transform">		<title>Multilingual Sentence Transformers | Pinecone</title>		<link>http://www.semanlink.net/doc/2021/11/multilingual_sentence_transform</link>		<description>How to make a text encoder multilingual using sentence transformers and multilingual knowledge distillation.		</description>		<dc:date>2021-11-04T23:09:34Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/11/mixed_negative_sampling_for_lea">		<title>Mixed Negative Sampling for Learning Two-tower Neural Networks in Recommendations – Google Research (WWW 2020)</title>		<link>http://www.semanlink.net/doc/2021/11/mixed_negative_sampling_for_lea</link>		<description>&gt; a novel negative sampling approach called **Mixed Negative Sampling (MNS**). In particular, different from commonly used batch or unigram sampling methods, MNS uses a mixture of batch and uniformly sampled negatives to tackle the selection bias of implicit user feedback

(voir si ça a un rapport avec [Multiple Negatives Ranking Loss&#93;(doc:2021/10/next_gen_sentence_embeddings_wi))		</description>		<dc:date>2021-11-04T17:31:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/11/train_embeddings_by_using_the_t">		<title>Train embeddings by using the Two-Tower built-in algorithm  |  Vertex AI</title>		<link>http://www.semanlink.net/doc/2021/11/train_embeddings_by_using_the_t</link>		<description>&gt; The Two-Tower model pairs similar types of objects, such as user profiles, search queries, web documents, answer passages, or images, in the same vector space, so that related items are close to each other. **The Two-Tower model consists of two encoder towers: the query tower and the candidate tower**. These towers embed independent items into a shared embedding space, which lets Matching Engine retrieve similarly matched items.
&gt;
&gt; To train a Two-Tower model, Google uses **pairs of relevant items**. Each pair consists of a query document and a candidate document. Documents contain arbitrary customer-defined features including text, numeric, and categorical features. After training, the Two-Tower built-in algorithm exports two TensorFlow SavedModels—a query encoder and a candidate encoder... Given a query item, Matching Engine uses the query encoder to generate a query embedding, and uses the index to find similar candidate embeddings. Matching Engine uses the candidate encoder to index all the items and serve them by using an approximate nearest neighbor solution.		</description>		<dc:date>2021-11-04T17:23:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/on_the_stability_of_fine_tuning">		<title>On the Stability of Fine-tuning BERT: Misconceptions, Explanations, and Strong Baselines (2021)</title>		<link>http://www.semanlink.net/doc/2021/10/on_the_stability_of_fine_tuning</link>		<description>&gt; **an analysis of the fine-tuning instability of BERT-based models and a simple method to fix it**
&gt;
&gt; Despite the strong empirical performance of fine-tuned models, fine-tuning is an unstable process: training the same model with multiple random seeds can result in a large variance of the task performance.
&gt;
&gt; 2 potential reasons identified in (Devlin et al., 2019; Lee et al., 2020; Dodge et al., 2020) : 
&gt; - catastrophic forgetting 
&gt; - small size of the fine-tuning datasets. 
&gt;
&gt; we show that both hypotheses fail to explain the fine-tuning instability, which is caused by optimization difficulties / **vanishing gradients**). 
&gt;
&gt; A simple but strong baseline that makes fine-tuning BERT-based models significantly more stable than the previously proposed approaches.
&gt;
&gt; [Github&#93;(https://github.com/uds-lsv/bert-stable-fine-tuning)		</description>		<dc:date>2021-10-30T09:14:09Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/next_gen_sentence_embeddings_wi">		<title>Next-Gen Sentence Embeddings with Multiple Negatives Ranking Loss | Pinecone</title>		<link>http://www.semanlink.net/doc/2021/10/next_gen_sentence_embeddings_wi</link>		<description>&gt; the world of sentence embeddings was ignited with the introduction of SBERT in 2019. Since then, many more sentence transformers have been introduced. These models quickly made the original SBERT obsolete. How did these newer sentence transformers manage to outperform SBERT so quickly? The answer is **multiple negatives ranking (MNR) loss**.

&gt; In short; **fine-tune your models with MNR loss, and do it with the [sentence-transformers&#93;(tag:sbert) library**.

(mentionned in a [tweet&#93;(https://twitter.com/Nils_Reimers/status/1453001422400856086) by [Nils Reimers&#93;(tag:nils_reimers))		</description>		<dc:date>2021-10-27T01:24:49Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/sentence_embeddings_and_transfo">		<title>Sentence Embeddings and Transformers | Pinecone</title>		<link>http://www.semanlink.net/doc/2021/10/sentence_embeddings_and_transfo</link>		<dc:date>2021-10-23T01:04:37Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/alphafold_2_is_here_what%E2%80%99s_beh">		<title>AlphaFold 2 is here: what’s behind the structure prediction miracle | Oxford Protein Informatics Group</title>		<link>http://www.semanlink.net/doc/2021/10/alphafold_2_is_here_what%E2%80%99s_beh</link>		<description>&gt; to recap: AlphaFold 2 finds similar sequences to the input, extracts the information using an especial neural network architecture (a transformer), and then passes that information to another neural network that produces a structure.		</description>		<dc:date>2021-10-20T00:31:53Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/l%E2%80%99intelligence_artificielle_ge">		<title>L’intelligence artificielle, génie de la biologie moléculaire</title>		<link>http://www.semanlink.net/doc/2021/10/l%E2%80%99intelligence_artificielle_ge</link>		<dc:date>2021-10-20T00:26:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/sahajtomar_french_semantic_%C2%B7_hu">		<title>Sahajtomar/french_semantic · Hugging Face</title>		<link>http://www.semanlink.net/doc/2021/10/sahajtomar_french_semantic_%C2%B7_hu</link>		<dc:date>2021-10-14T16:08:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/omer_levy_sur_twitter_what_i">		<title>Omer Levy sur Twitter : &quot;What if I told you that fine-tuning T5-Large (0.8B params) on a couple hundred examples could outperform GPT-3 (175B params) on a bunch of tasks?&quot;</title>		<link>http://www.semanlink.net/doc/2021/10/omer_levy_sur_twitter_what_i</link>		<dc:date>2021-10-13T12:53:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/10/google_ai_blog_exploring_trans">		<title>Google AI Blog: Exploring Transfer Learning with T5: the Text-To-Text Transfer Transformer (2020)</title>		<link>http://www.semanlink.net/doc/2021/10/google_ai_blog_exploring_trans</link>		<description>&gt; With T5, we propose reframing all NLP tasks into a unified text-to-text-format where the input and output are always text strings, in contrast to BERT-style models that can only output either a class label or a span of the input. Our text-to-text framework allows us to use the same model, loss function, and hyperparameters on any NLP task, including machine translation, document summarization, question answering, and classification tasks		</description>		<dc:date>2021-10-13T12:49:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/09/2106_04647_compacter_efficie">		<title>[2106.04647&#93; Compacter: Efficient Low-Rank Hypercomplex Adapter Layers</title>		<link>http://www.semanlink.net/doc/2021/09/2106_04647_compacter_efficie</link>		<description>&gt; Compacter (Compact Adapter) layers, a method to adapt large-scale language models, which only trains around 0.05% of a model&apos;s parameters and performs on par with fine-tuning. [twitter&#93;(https://twitter.com/KarimiRabeeh/status/1404774464441794560)		</description>		<dc:date>2021-09-29T02:05:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/09/2010_12566_dict_mlm_improved">		<title>[2010.12566&#93; DICT-MLM: Improved Multilingual Pre-Training using Bilingual Dictionaries</title>		<link>http://www.semanlink.net/doc/2021/09/2010_12566_dict_mlm_improved</link>		<description>&gt; Despite the strong representation learning capability enabled by MLM, we demonstrate an inherent limitation of MLM for multilingual representation learning. In particular, by requiring the model to predict the language-specific token, the MLM objective disincentivizes learning a language-agnostic representation -- which is a key goal of multilingual pre-training
&gt;
&gt; DICT-MLM works by incentivizing the model
to be able to predict not just the original
masked word, but potentially any of its crosslingual
synonyms as well.		</description>		<dc:date>2021-09-06T18:27:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/07/google_ai_blog_from_vision_to_">		<title>Google AI Blog: From Vision to Language: Semi-supervised Learning in Action…at Scale</title>		<link>http://www.semanlink.net/doc/2021/07/google_ai_blog_from_vision_to_</link>		<description>Semi-Supervised Distillation (SSD). First, the teacher model infers pseudo-labels on the unlabeled dataset from which we then train a new teacher model (T’) that is of equal-or-larger size than the original teacher model. This step, which is essentially self-training, is then followed by knowledge distillation to produce a smaller student model for production.		</description>		<dc:date>2021-07-14T23:34:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/06/2102_07043_reasoning_over_vir">		<title>[2102.07043&#93; Reasoning Over Virtual Knowledge Bases With Open Predicate Relations</title>		<link>http://www.semanlink.net/doc/2021/06/2102_07043_reasoning_over_vir</link>		<description>&gt; a method for constructing **a virtual KB (VKB) trained entirely from text**

Open Predicate Query Language (OPQL): constructing a virtual knowledge base (VKB) that supports KB reasoning &amp; open-domain QA, tackling the incompleteness of knowledge bases by constructing a virtual KB only from text

&gt; OPQL constructs
a VKB by **encoding and indexing a set of
relation mentions** in a way that naturally enables
reasoning and can be trained without any structured
supervision.

&gt; can be used
as an **external memory integrated into a language
model**

cf. this earlier paper [[2002.10640&#93; Differentiable Reasoning over a Virtual Knowledge Base&#93;(doc:2020/07/2002_10640_differentiable_rea). But does not require an initial structured KB for distant
supervision.

&gt; The key idea in constructing the OPQL VKB is to use a
dual-encoder pre-training process, similar to 
[[1906.03158&#93; Matching the Blanks: Distributional Similarity for Relation Learning&#93;(doc:2021/05/1906_03158_matching_the_blank)

Related work section refers to [[1909.04164&#93; Knowledge Enhanced Contextual Word Representations&#93;(doc:2020/05/1909_04164_knowledge_enhanced). Also refers to [[2007.00849&#93; Facts as Experts: Adaptable and Interpretable Neural Memory over Symbolic Knowledge&#93;(doc:2020/07/2007_00849_facts_as_experts_) (some authors in common)		</description>		<dc:date>2021-06-20T08:30:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/06/semantic_search_with_s_bert_is_">		<title>Semantic Search with S-BERT is all you need</title>		<link>http://www.semanlink.net/doc/2021/06/semantic_search_with_s_bert_is_</link>		<description>&gt; SentenceTransformers is designed in such way that fine-tuning your own sentence / text embeddings models is easy.		</description>		<dc:date>2021-06-05T16:02:26Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/05/making_sense_of_raw_input">		<title>Making sense of raw input</title>		<link>http://www.semanlink.net/doc/2021/05/making_sense_of_raw_input</link>		<description>&gt;... this way we are able to **jointly learn** how to perceive (**mapping raw sensory information to concepts**) and apperceive (**combining concepts into declarative rules**)

cf. [Making sense of sensory input&#93;(doc:2021/04/1910_02227_making_sense_of_se)		</description>		<dc:date>2021-05-21T12:09:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/05/1906_03158_matching_the_blank">		<title>[1906.03158&#93; Matching the Blanks: Distributional Similarity for Relation Learning</title>		<link>http://www.semanlink.net/doc/2021/05/1906_03158_matching_the_blank</link>		<description>&gt; a new method
of learning relation representations directly from
text
&gt;
&gt; First, we study the **ability of the Transformer
neural network architecture (Vaswani et al., 2017)
to encode relations between entity pairs**, and we
identify a method of representation that outperforms
previous work in supervised relation extraction.
Then, we present a method of training this relation
representation **without any supervision from
a knowledge graph or human annotators** from widely available distant supervision
in the form of entity linked text
&gt;
&gt; **we assume** access
to a corpus of text in which entities have been
linked to unique identifiers and we define a relation statement to be a block of text containing two
marked entities.		</description>		<dc:date>2021-05-13T00:39:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/05/1909_10506_learning_dense_rep">		<title>[1909.10506&#93; Learning Dense Representations for Entity Retrieval</title>		<link>http://www.semanlink.net/doc/2021/05/1909_10506_learning_dense_rep</link>		<description>&gt; We show that it is feasible to perform **entity
linking by training a dual encoder (two-tower)
model that encodes mentions and entities in
the same dense vector space**, where candidate
entities are retrieved by approximate nearest
neighbor search. Unlike prior work, **this setup
does not rely on an alias table followed by a
re-ranker, and is thus the first fully learned entity
retrieval model**.

Contributions:

&gt; -  a dual encoder architecture for
learning entity and mention encodings suitable for
retrieval. A key feature of the architecture is that it
employs a modular **hierarchy of sub-encoders that
capture different aspects of mentions and entities**
&gt; - a simple, fully unsupervised **hard negative
mining** strategy that produces massive gains
in retrieval performance, compared to using only
random negatives
&gt; - high
quality candidate entities very efficiently using approximate nearest neighbor search
&gt; - outperforms discrete retrieval
baselines like an alias table or BM25

&gt; strong retrieval
performance across all 5.7 million Wikipedia entities in
around 3ms per mention

&gt; since we are using a two-tower or dual
encoder architecture, **our model cannot use any kind of attention over
both mentions and entities at once**, nor feature-wise
comparisons as done by Francis-Landau et al. (2016).
This is a fairly severe constraint – for example, **we cannot
directly compare the mention span to the entity title**
– but it permits retrieval with nearest neighbor search
for the entire context against a single, all encompassing
representation for each entity		</description>		<dc:date>2021-05-01T09:11:15Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_sbe">		<title>Nils Reimers sur Twitter : &quot;SBERT Release v1.1.0&quot;</title>		<link>http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_sbe</link>		<dc:date>2021-04-22T19:35:49Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/2011_05864_on_the_sentence_em">		<title>[2011.05864&#93; On the Sentence Embeddings from Pre-trained Language Models</title>		<link>http://www.semanlink.net/doc/2021/04/2011_05864_on_the_sentence_em</link>		<description>&gt; **the sentence
embeddings from the pre-trained language
models without fine-tuning have been
found to poorly capture semantic meaning of
sentences.**
&gt;
&gt; We find that **BERT always induces
a non-smooth anisotropic semantic space of
sentences**, which harms its performance of
semantic similarity. To address this issue,
we propose to transform the anisotropic sentence
embedding distribution to a smooth and
isotropic Gaussian distribution through normalizing
flows that are learned with an unsupervised
objective

&gt; normalizing flows (Dinh et al., 2015): invertible function parameterized by neural networks.
&gt; **During
training, only the flow network is optimized
while the BERT parameters remain unchanged**

&gt; When combined with external supervision from
natural language inference tasks (Bowman et al.,
2015; Williams et al., 2018), our method outperforms
the [Sentence-BERT&#93;(tag:sbert) embeddings

[GitHub&#93;(https://github.com/bohanli/BERT-flow)
		</description>		<dc:date>2021-04-19T01:13:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/simcse_simple_contrastive_lear">		<title>SimCSE: Simple Contrastive Learning of Sentence Embeddings</title>		<link>http://www.semanlink.net/doc/2021/04/simcse_simple_contrastive_lear</link>		<description>(by one of the authors of [KEPLER&#93;(doc:2020/11/1911_06136_kepler_a_unified_))

a contrastive sentence
embedding framework, which can be used to produce
sentence embeddings, from either
unlabeled or labeled data.

&gt; 1. **an unsupervised approach,
which takes an input sentence and predicts
itself in a contrastive objective, with only
standard dropout** used as noise
&gt; 2. we draw inspiration
from the recent success of learning sentence
embeddings from natural language inference
(NLI) datasets and incorporate annotated
pairs from NLI datasets into contrastive
learning by using “entailment” pairs as positives
and “contradiction” pairs as hard negatives

Cites [[2011.05864&#93; On the Sentence Embeddings from Pre-trained Language Models&#93;(doc:2021/04/2011_05864_on_the_sentence_em) (question of the anisotropic semantic space of BERT&apos;s sentences)		</description>		<dc:date>2021-04-18T18:28:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_new">		<title>Nils Reimers sur Twitter : &quot;New models for Neural Information Retrieval...&quot;</title>		<link>http://www.semanlink.net/doc/2021/04/nils_reimers_sur_twitter_new</link>		<dc:date>2021-04-17T10:07:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/2007_12603_ir_bert_leveragin">		<title>[2007.12603&#93; IR-BERT: Leveraging BERT for Semantic Search in Background Linking for News Articles</title>		<link>http://www.semanlink.net/doc/2021/04/2007_12603_ir_bert_leveragin</link>		<dc:date>2021-04-12T18:27:34Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/2007_15779_domain_specific_la">		<title>[2007.15779&#93; Domain-Specific Language Model Pretraining for Biomedical Natural Language Processing</title>		<link>http://www.semanlink.net/doc/2021/04/2007_15779_domain_specific_la</link>		<description>&gt; A prevailing assumption is that even domain-specific pretraining can benefit by starting from general-domain language models. In this paper, we challenge this assumption by showing that **for domains with abundant unlabeled text, such as biomedicine, pretraining language models from scratch results in substantial gains over continual pretraining of general-domain language models**		</description>		<dc:date>2021-04-11T16:38:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/1902_00751_parameter_efficien">		<title>[1902.00751&#93; Parameter-Efficient Transfer Learning for NLP</title>		<link>http://www.semanlink.net/doc/2021/04/1902_00751_parameter_efficien</link>		<description>**Adapter tuning for NLP**.


A strategy for tuning a large text model on several
downstream tasks, that permits training on
tasks sequentially, and that adds only a small number
of additional parameters per task.

New modules added between layers of a
pre-trained network. Parameters of the original network are frozen
and therefore may be shared by many tasks.


[GitHub google-research/adapter-bert&#93;(https://github.com/google-research/adapter-bert)		</description>		<dc:date>2021-04-11T13:13:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/exbert_extending_pre_trained_m">		<title>exBERT: Extending Pre-trained Models with Domain-specific Vocabulary Under Constrained Training Resources - ACL Anthology</title>		<link>http://www.semanlink.net/doc/2021/04/exbert_extending_pre_trained_m</link>		<description>**Focus on the Embedding of Domain-specific Vocabulary.**

&gt; exBERT
adds a new domain-specific vocabulary and the corresponding
embedding layer, as well as a small
extension module to the original unmodified model

&gt; a pretraining
method allowing **low-cost embedding of
domain-specific vocabulary in the context of an
existing large pre-trained model such as BERT**

&gt; exBERT... explicitly incorporates
the new domain’s vocabulary, while being able to
**reuse the original pre-trained model’s weights as is**
to reduce required computation and training data. Specifically, exBERT extends BERT by augmenting
its embeddings for the original vocabulary with
new embeddings for the domain-specific vocabulary
via **a learned small “extension” module**. **The
output of the original and extension modules are
combined via a trainable weighted sum operation**

In a way similar to concept developed in

&gt; [[1902.00751&#93; Parameter-Efficient Transfer Learning for NLP&#93;(doc:2021/04/1902_00751_parameter_efficien), but not in the fine-tuning paradigm.

[Github&#93;(https://github.com/cgmhaicenter/exBERT)		</description>		<dc:date>2021-04-11T10:13:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/04/1910_02227_making_sense_of_se">		<title>[1910.02227&#93; Making sense of sensory input</title>		<link>http://www.semanlink.net/doc/2021/04/1910_02227_making_sense_of_se</link>		<description>&gt; what does it mean to “make sense”
of a sensory sequence? Our answer is that making sense means constructing a symbolic theory containing a set
of objects that persist over time, with attributes that change over time, according to general laws. This theory
must both explain the sensory input, and satisfy unity conditions [the
constituents of our theory – objects, properties, and atoms – must be integrated into a coherent whole&#93;

Sequel: [Making sense of raw input&#93;(doc:2021/05/making_sense_of_raw_input)		</description>		<dc:date>2021-04-10T19:09:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/03/1901_04085_passage_re_ranking">		<title>[1901.04085&#93; Passage Re-ranking with BERT</title>		<link>http://www.semanlink.net/doc/2021/03/1901_04085_passage_re_ranking</link>		<description>a simple re-implementation of BERT for query-based passage re-ranking

[&quot;Slides of our WSDM 2021 tutorial &quot;Pretrained Transformers for Text Ranking: BERT and Beyond&quot;&#93;(doc:2021/03/rodrigo_nogueira_sur_twitter_)		</description>		<dc:date>2021-03-26T01:49:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/03/sentencetransformers_documentat">		<title>SentenceTransformers Documentation</title>		<link>http://www.semanlink.net/doc/2021/03/sentencetransformers_documentat</link>		<dc:date>2021-03-25T19:05:01Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/03/rodrigo_nogueira_sur_twitter_">		<title>Rodrigo Nogueira sur Twitter : &quot;Slides of our WSDM 2021 tutorial &quot;Pretrained Transformers for Text Ranking: BERT and Beyond&quot;</title>		<link>http://www.semanlink.net/doc/2021/03/rodrigo_nogueira_sur_twitter_</link>		<dc:date>2021-03-09T08:09:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/02/zero_shot_learning_in_modern_nl">		<title>Zero-Shot Learning in Modern NLP | Joe Davison Blog (2020-05)</title>		<link>http://www.semanlink.net/doc/2021/02/zero_shot_learning_in_modern_nl</link>		<description>&gt; state-of-the-art NLP
models for sequence classification without large annotated training
sets.

Simple idea: use a single model (eg. [Sentence-BERT&#93;(tag:sbert)) to embed both the text data and the class names into the same space. 

Pb: Sentence-BERT is designed to learn
effective sentence-level, not single- or multi-word representations like our
class names -&gt; the label
embeddings may not be as semantically salient as  word-level
embedding methods (i.e. word2vec).

Solution 1: Learn a projection from sentence level embeddings of words to word2vec embeddings, use it for encoding when learning classifier. Can be adapted to few short learning

Solution 2: &quot;Classification as [#Natural Language Inference&#93;(tag:nli)&quot;. 

&gt; A method which not only embeds
sequences and labels into the same latent space where their distance can
be measured, but that can actually tell us something about the compatibility
of two distinct sequences out of the box.		</description>		<dc:date>2021-02-23T13:44:34Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/02/kamalkraj_bert_ner_pytorch_nam">		<title>kamalkraj/BERT-NER: Pytorch-Named-Entity-Recognition-with-BERT</title>		<link>http://www.semanlink.net/doc/2021/02/kamalkraj_bert_ner_pytorch_nam</link>		<description>Use google BERT to do CoNLL-2003 NER !		</description>		<dc:date>2021-02-07T11:37:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2021/01/1911_03681_e_bert_efficient_">		<title>[1911.03681&#93; E-BERT: Efficient-Yet-Effective Entity Embeddings for BERT</title>		<link>http://www.semanlink.net/doc/2021/01/1911_03681_e_bert_efficient_</link>		<description>&gt; way of **injecting factual knowledge about entities into the pretrained BERT model**.

(Feeding entity vectors
into BERT as if they
were wordpiece vectors without additional encoder
pretraining)

&gt;
&gt; **We align [Wikipedia2Vec&#93;(tag:wikipedia2vec) entity vectors (Yamada et al., 2016) with BERT&apos;s native wordpiece vector space and use the aligned entity vectors as if they were wordpiece vectors**. The resulting entity-enhanced version of BERT (called E-BERT) is similar in spirit to [ERNIE&#93;(tag:ernie) (Zhang et al., 2019) and [KnowBert&#93;(tag:knowbert) (Peters et al., 2019), but it **requires no expensive further pretraining of the BERT encoder**.
&gt;
&gt; Our vector space alignment strategy is inspired by
cross-lingual word vector alignment

Related work on Entity-enhanced BERT:

&gt; ([ERNIE&#93;(doc:2019/08/_1905_07129_ernie_enhanced_la) and [Knowbert&#93;(doc:2020/05/1909_04164_knowledge_enhanced)) are based on the design principle
that BERT be adapted to entity vectors. They introduce
new encoder layers to feed pretrained entity
vectors into the Transformer, and they require additional
pretraining to integrate the new parameters.
In contrast, E-BERT’s design principle is that entity
vectors be adapted to BERT.
&gt;
&gt; Two other knowledge-enhanced MLMs are [KEPLER&#93;(doc:2020/11/1911_06136_kepler_a_unified_)
(Wang et al., 2019c) and K-Adapter (Wang
et al., 2020)... Their factual knowledge
does not stem from entity vectors – instead, they
are trained in a multi-task setting on relation classification
and knowledge base completion.

Not to be cofounded with [[2009.02835&#93; E-BERT: A Phrase and Product Knowledge Enhanced Language Model for E-commerce&#93;(doc:2020/12/2009_02835_e_bert_a_phrase_a)		</description>		<dc:date>2021-01-12T18:31:21Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/google_tapas_base_finetuned_wtq">		<title>google/tapas-base-finetuned-wtq · Hugging Face</title>		<link>http://www.semanlink.net/doc/2020/12/google_tapas_base_finetuned_wtq</link>		<description>&gt; a BERT-like transformers model pretrained on a large corpus of English data from Wikipedia in a self-supervised fashion		</description>		<dc:date>2020-12-17T22:40:56Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/2009_02835_e_bert_a_phrase_a">		<title>[2009.02835&#93; E-BERT: A Phrase and Product Knowledge Enhanced Language Model for E-commerce</title>		<link>http://www.semanlink.net/doc/2020/12/2009_02835_e_bert_a_phrase_a</link>		<description>E-BERT, pre-training framework for product data.

1. to benefit from phrase-level knowledge: Adaptive Hybrid Masking, a new masking strategy, which allows the model to adaptively switch from learning preliminary word knowledge to learning complex phrases
2. leveraging product-level knowledge: training E-BERT to
predict a product’s associated neighbors (product association)

Resources used:

- description of millions of products from the amazon dataset (title, description, reviews)
- e-commerce phrases: extracted from above dataset using [AutoPhrase&#93;(doc:2020/12/autophrase_automated_phrase_mi)
- product association graph: pairs of substitutable and complementary products extracted from amazon dataset

Not to be confounded with [[1911.03681&#93; E-BERT: Efficient-Yet-Effective Entity Embeddings for BERT&#93;(doc:2021/01/1911_03681_e_bert_efficient_)		</description>		<dc:date>2020-12-14T11:10:29Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/2002_08909_realm_retrieval_a">		<title>[2002.08909&#93; REALM: Retrieval-Augmented Language Model Pre-Training</title>		<link>http://www.semanlink.net/doc/2020/12/2002_08909_realm_retrieval_a</link>		<description>**Augment language model pre-training with a retriever module**, which
is trained using the masked language modeling objective.

&gt; To capture knowledge in a more modular and interpretable way, we augment language model pre-training with a latent knowledge retriever, which allows the model to retrieve and attend over documents from a large corpus such as Wikipedia, used during pre-training, fine-tuning and inference. **For the first time, we show how to pre-train such a knowledge retriever in an unsupervised manner**, using masked language modeling as the learning signal and backpropagating through a retrieval step that considers millions of documents

Hum, #TODO: parallel to be drawn with techniques in [KG-augmented Language Models&#93;(tag:knowledge_graph_augmented_language_models) which focus &quot;on the problem of capturing declarative knowledge in the learned parameters of a language model.&quot;

[Google AI Blog Post&#93;(doc:2020/08/google_ai_blog_realm_integrat)

[Summary&#93;(https://joeddav.github.io/blog/2020/03/03/REALM.html) for the [Hugging Face awesome-papers reading group&#93;(doc:2021/03/huggingface_awesome_papers_pap)		</description>		<dc:date>2020-12-12T02:30:25Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/supporting_content_decision_mak">		<title>Supporting content decision makers with machine learning | Dec, 2020 | Netflix TechBlog</title>		<link>http://www.semanlink.net/doc/2020/12/supporting_content_decision_mak</link>		<dc:date>2020-12-11T13:34:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/google_ai_blog_reformer_the_e">		<title>Google AI Blog: Reformer: The Efficient Transformer</title>		<link>http://www.semanlink.net/doc/2020/12/google_ai_blog_reformer_the_e</link>		<dc:date>2020-12-09T12:07:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_">		<title>Keyword Extraction with BERT | Towards Data Science</title>		<link>http://www.semanlink.net/doc/2020/12/keyword_extraction_with_bert_%7C_</link>		<description>A minimal method for extracting keywords and keyphrases.

[GitHub&#93;(https://github.com/MaartenGr/KeyBERT/)

&gt; uses BERT-embeddings and simple cosine similarity to find the sub-phrases in a document that are the most similar to the document itself.		</description>		<dc:date>2020-12-06T10:07:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/salmon_run_word_sense_disambig">		<title>Salmon Run: Word Sense Disambiguation using BERT as a Language Model</title>		<link>http://www.semanlink.net/doc/2020/12/salmon_run_word_sense_disambig</link>		<dc:date>2020-12-01T15:45:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/12/domain_specific_bert_models_%C2%B7_c">		<title>Domain-Specific BERT Models · Chris McCormick</title>		<link>http://www.semanlink.net/doc/2020/12/domain_specific_bert_models_%C2%B7_c</link>		<description>Chances are you won’t be able to pre-train BERT on your own dataset, for the following reasons:

1. Pre-training BERT requires a huge corpus
2. Huge Model + Huge Corpus = Lots of GPUs 		</description>		<dc:date>2020-12-01T15:08:22Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/11/2010_01057_luke_deep_context">		<title>[2010.01057&#93; LUKE: Deep Contextualized Entity Representations with Entity-aware Self-attention</title>		<link>http://www.semanlink.net/doc/2020/11/2010_01057_luke_deep_context</link>		<description>&gt; LUKE is based on bidirectional Transformer, treats words and entities in a text as independent tokens, and outputs contextualized representations of them. The representations can be used to address downstream tasks similarly to BERT. [src&#93;(https://twitter.com/ikuyamada/status/1312947499141750786)

&gt; LUKE is trained using a novel pretraining task that involves predicting randomly masked words (equivalent to BERT’s masked language model) and entities in an entity-annotated corpus obtained from Wikipedia.

(Hum, ça me rappelle quelque chose)

&gt; LUKE also uses a new *entity-aware* self-attention mechanism that considers the types of tokens (words or entities) when computing attention scores.

[github&#93;(https://github.com/studio-ousia/luke), [at Hugging Face&#93;(https://twitter.com/AkariAsai/status/1389428550298525696), [doc&#93;(https://huggingface.co/transformers/model_doc/luke.html), [tweet&#93;(https://twitter.com/ikuyamada/status/1392742990586683392?s=20)		</description>		<dc:date>2020-11-26T16:21:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/11/raphaelsty_ckb_contextual_know">		<title>raphaelsty/ckb: Contextual knowledge bases</title>		<link>http://www.semanlink.net/doc/2020/11/raphaelsty_ckb_contextual_know</link>		<description>Une implémentation de [BLP&#93;(tag:blp) [[2010.03496&#93; Inductive Entity Representations from Text via Link Prediction&#93;(doc:2020/11/2010_03496_inductive_entity_r)		</description>		<dc:date>2020-11-09T16:10:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/11/2010_03496_inductive_entity_r">		<title>[2010.03496&#93; Inductive Entity Representations from Text via Link Prediction</title>		<link>http://www.semanlink.net/doc/2020/11/2010_03496_inductive_entity_r</link>		<description>BLP &quot;BERT for Link Prediction&quot;. Central idea: **training an entity encoder with a
link prediction objective** (using the textual descriptions of entities when computing entity representations - hence not failing with entities unknown in training)

&gt; a method for **learning representations
of entities**, that uses a **pre-trained Transformer** based
architecture as an entity encoder, and
**link prediction training on a knowledge graph
with textual entity descriptions**.

&gt; using entity descriptions,
an entity encoder is trained for link prediction in
a knowledge graph. The encoder can then be used
without fine-tuning to obtain features for entity classification
and information retrieval

Cites [Xie et al&#93;(doc:2020/10/representation_learning_of_know) and [Kepler&#93;(doc:2020/11/1911_06136_kepler_a_unified_). They claim that their
objective targeted exclusively for link prediction (and not an objective that combines language modeling
and link prediction as Kepler)
performs better than Kepler&apos;s more complex one.

[Github&#93;(https://github.com/dfdazac/blp)		</description>		<dc:date>2020-11-03T16:38:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/10/which_flavor_of_bert_should_you">		<title>Which flavor of BERT should you use for your QA task? | by Olesya Bondarenko | Towards Data Science</title>		<link>http://www.semanlink.net/doc/2020/10/which_flavor_of_bert_should_you</link>		<description>A guide to choosing and benchmarking BERT models for question answering		</description>		<dc:date>2020-10-04T23:31:57Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/10/2010_00402_from_trees_to_cont">		<title>[2010.00402&#93; From Trees to Continuous Embeddings and Back: Hyperbolic Hierarchical Clustering</title>		<link>http://www.semanlink.net/doc/2020/10/2010_00402_from_trees_to_cont</link>		<description>&gt; The key idea of our method, HypHC, is showing a direct correspondence from discrete trees to continuous representations (via the hyperbolic embeddings of their leaf nodes) and back (via a decoding algorithm that maps leaf embeddings to a dendrogram), **allowing us to search the space of discrete binary trees with continuous optimization**.

Cites [Dasgupta: A cost function for similarity-based hierarchical clustering&#93;(https://arxiv.org/abs/1510.05043)		</description>		<dc:date>2020-10-03T14:46:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/08/google_ai_blog_realm_integrat">		<title>Google AI Blog: REALM: Integrating Retrieval into Language Representation Models</title>		<link>http://www.semanlink.net/doc/2020/08/google_ai_blog_realm_integrat</link>		<description>&gt; a new open-source method for language model pre-training that uses a supplemental knowledge retriever that enables it to perform well on knowledge-intensive tasks without billions of parameters.
&gt;
&gt; **The key intuition of REALM is that a retrieval system should improve the model&apos;s ability to fill in missing words**

[Paper:  REALM: Retrieval-Augmented Language Model Pre-Training&#93;(doc:2020/12/2002_08909_realm_retrieval_a)		</description>		<dc:date>2020-08-13T10:09:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/ukplab_sentence_transformers_s">		<title>UKPLab/sentence-transformers: Sentence Embeddings with BERT &amp; XLNet</title>		<link>http://www.semanlink.net/doc/2020/07/ukplab_sentence_transformers_s</link>		<description>[paper&#93;(doc:2019/08/_1908_10084_sentence_bert_sen)		</description>		<dc:date>2020-07-14T19:08:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/how_to_use_bert_for_finding_sim">		<title>How to use BERT for finding similar sentences or similar news? · Issue #876 · huggingface/transformers</title>		<link>http://www.semanlink.net/doc/2020/07/how_to_use_bert_for_finding_sim</link>		<description>links to [UKPLab/sentence-transformers&#93;(doc:2020/07/ukplab_sentence_transformers_s)

[Another answer&#93;(https://github.com/huggingface/transformers/issues/2986)

		</description>		<dc:date>2020-07-12T15:26:41Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/2004_07202_entities_as_expert">		<title>[2004.07202&#93; Entities as Experts: Sparse Memory Access with Entity Supervision</title>		<link>http://www.semanlink.net/doc/2020/07/2004_07202_entities_as_expert</link>		<description>&gt;  We focus on the problem of **capturing declarative knowledge in the learned parameters of a language model**...

&gt; Entities as Experts (EaE) can access distinct memories of the entities mentioned in a piece of text;

&gt; To understand the motivation for distinct and
independent entity representations: A traditional Transformer would need to build an internal representation
of Charles Darwin from the words “Charles”
and “Darwin”... Conversely, EAE can access
a dedicated representation of “Charles Darwin”,
which is a memory of all of the contexts in which
this entity has previously been mentioned.... Having retrieved
and re-integrated this memory it is much easier for
EAE to relate the question to the answer

&gt; EaE&apos;s entity representations are learned directly from text. Correct identification, and representation, of entities is essential to EaE&apos;s performance

Based on transformer architecture

Extension: [Facts as Experts&#93;(doc:2020/07/2007_00849_facts_as_experts_)		</description>		<dc:date>2020-07-11T15:09:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/2007_00849_facts_as_experts_">		<title>[2007.00849&#93; Facts as Experts: Adaptable and Interpretable Neural Memory over Symbolic Knowledge</title>		<link>http://www.semanlink.net/doc/2020/07/2007_00849_facts_as_experts_</link>		<description>&gt; a neural language model that includes **an explicit interface between symbolically interpretable factual information and subsymbolic neural knowledge.**... **The model can be updated without re-training by manipulating its symbolic representations**. In particular this model allows us to add new facts and overwrite existing ones.

&gt; a **neural language model which learns to access information
in a symbolic knowledge graph.**

&gt; This
model builds on the recently-proposed [Entities as
Experts&#93;(doc:2020/07/2004_07202_entities_as_expert) (EaE) language model (Févry et al., 2020),
which extends the same transformer (Vaswani
et al., 2017) architecture of BERT (Devlin et al., 2019) with an additional external memory for entities.
&gt;
&gt; After training EaE, the embedding associated
with an entity will (ideally) capture information
about the textual context in which that
entity appears, and by inference, the entity’s semantic
properties
&gt;
&gt; we include an additional
memory called a fact memory, which encodes
triples from a symbolic KB.
&gt;
&gt; This combination results in a
neural language model which learns to access information
in a the symbolic knowledge graph.



TODO: 

- read again IBM&apos;s [Span Selection Pre-training for Question Answering&#93;(doc:2019/09/_1909_04120_span_selection_pre) (&quot;an effort to avoid encoding general knowledge in the transformer network itself&quot;)
- compare with [[1907.05242&#93; Large Memory Layers with Product Keys&#93;(doc:2019/07/_1907_05242_large_memory_layer)
- how does it relate with [[2002.08909&#93; REALM: Retrieval-Augmented Language Model Pre-Training&#93;(doc:2020/12/2002_08909_realm_retrieval_a)?		</description>		<dc:date>2020-07-09T23:54:59Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/bert_word_embeddings_tutorial_%C2%B7">		<title>BERT Word Embeddings Tutorial · Chris McCormick</title>		<link>http://www.semanlink.net/doc/2020/07/bert_word_embeddings_tutorial_%C2%B7</link>		<dc:date>2020-07-06T14:51:33Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/07/learning_to_tag_oov_tokens_by_i">		<title>Learning to Tag OOV Tokens by Integrating Contextual Representation and Background Knowledge (ACL Anthology 2020)</title>		<link>http://www.semanlink.net/doc/2020/07/learning_to_tag_oov_tokens_by_i</link>		<description>Aim to leverage both contextual representation of input text (deep LMs)  and  knowledge derived
from curated KBs ([Wordnet&#93;(tag:wordnet)) to improve [slot tagging&#93;(tag:slot_tagging) in the presence of [out-of-vocab&#93;(tag:oov) words ([few-shot scenario&#93;(tag:few_shot_learning))

Method:

1. retrieve potentially relevant KB entities and
encode them into distributed representations that
describe global graph-structured information
2. BERT encoder
layer to capture context-aware representations of
the sequence and attend to the KB embeddings
using multi-level graph attention
3. integrate
BERT embeddings and the KB embeddings
to predict the slot type

Contributions:

1. feasibility of applying lexical ontology
to facilitate recognizing OOV words. First to consider the large-scale background
knowledge for enhancing context-aware
slot tagging models.
2. a knowledge integration mechanism that uses multi-level graph
attention to model explicit lexical relations.
3.experiments on two benchmark datasets

&gt; our method makes a notable difference in a
scenario where samples are linguistically diverse,
and large vocab exists.

(Better improvements when using RNN than BERT, because BERT already contains a lot of background knowledge)		</description>		<dc:date>2020-07-04T11:34:35Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/patrick_von_platen_sur_twitter_">		<title>Patrick von Platen sur Twitter : &quot;Today, @huggingface is the start of our Reformer series...&quot;</title>		<link>http://www.semanlink.net/doc/2020/06/patrick_von_platen_sur_twitter_</link>		<dc:date>2020-06-29T19:07:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/2001_04451_reformer_the_effi">		<title>[2001.04451&#93; Reformer: The Efficient Transformer</title>		<link>http://www.semanlink.net/doc/2020/06/2001_04451_reformer_the_effi</link>		<dc:date>2020-06-29T19:04:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/representation_learning_for_inf">		<title>Representation Learning for Information Extraction from Form-like Documents – Google Research</title>		<link>http://www.semanlink.net/doc/2020/06/representation_learning_for_inf</link>		<description>&gt; a novel approach using representation learning for tackling the problem of **extracting structured information from form-like document images**. We propose an **extraction system that uses knowledge of the types of the target fields to generate extraction candidates**, and a neural network architecture that learns a dense representation of each candidate based on neighboring words in the document.

[Blog post&#93;(doc:2020/06/google_ai_blog_extracting_stru)		</description>		<dc:date>2020-06-15T22:58:48Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/google_ai_blog_extracting_stru">		<title>Google AI Blog: Extracting Structured Data from Templatic Documents (2020)</title>		<link>http://www.semanlink.net/doc/2020/06/google_ai_blog_extracting_stru</link>		<description>[About this paper&#93;(doc:2020/06/representation_learning_for_inf)

Templatic documents (eg. invoices): such documents do not contain “natural
language” but
instead resemble forms, with data often presented in tables

&gt; an approach that **uses knowledge of target field types to identify
candidate fields**. These are then scored using **a neural network that
learns a dense representation of each candidate using the words in its
neighborhood**. Experiments on two corpora (invoices and receipts) show
that we’re able to generalize well to unseen layouts.
&gt;
&gt; An understanding of the **two-dimensional layout of text**
on the page is key to understanding such documents. On the other hand,
treating this purely as an image segmentation problem makes it difficult
to take advantage of the semantics of the text.
&gt;
&gt; Our approach to this problem allows developers to train and deploy an
extraction system for a given domain (like invoices) using **two inputs — a
target schema (i.e., a list of fields to extract and their corresponding
types) and a small collection of documents labeled with the ground truth
for use as a training set**

- The input document is first run through an [OCR service&#93;(doc:2020/06/detecter_le_texte_dans_les_fich).
- a candidate generator identifies spans of text in the OCR output that might correspond to
an instance of a given field (uses pre-existing
libraries associated with each field type)
- Each candidate is then scored using a neural
network (that is trained as a binary classifier)		</description>		<dc:date>2020-06-15T22:51:23Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/1804_03235_large_scale_distri">		<title>[1804.03235&#93; Large scale distributed neural network training through online distillation</title>		<link>http://www.semanlink.net/doc/2020/06/1804_03235_large_scale_distri</link>		<description>&gt;  we use *codistillation* to refer to distillation performed:
&gt; 1. using the same architecture for all the models;
&gt; 2. using the same dataset to train all the models; and
&gt; 3. using the distillation loss during training before any model has fully converged.

&gt; In general, we believe the quality gains of codistillation over well-tuned offline distillation will be
minor in practice and the more interesting research direction is exploring codistillation as a distributed
training algorithm

&gt; Codistillation with
the same data seems to be slightly better than the baseline, but codistillation using different data
gets much better results. These results show that the codistilling models are indeed successfully
transmitting useful information about different parts of the training data to each other.

Related to [&quot;Deep mutual learning&quot;&#93;(doc:2020/05/1706_00384_deep_mutual_learni) paper		</description>		<dc:date>2020-06-06T16:51:26Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/06/on_word_embeddings">		<title>On word embeddings</title>		<link>http://www.semanlink.net/doc/2020/06/on_word_embeddings</link>		<description>History of word embeddings in the context of language
modelling. [Next post in serie&#93;(doc:2020/06/approximating_the_softmax_for_l)		</description>		<dc:date>2020-06-05T01:31:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/05/1909_04164_knowledge_enhanced">		<title>[1909.04164&#93; Knowledge Enhanced Contextual Word Representations</title>		<link>http://www.semanlink.net/doc/2020/05/1909_04164_knowledge_enhanced</link>		<description>General method to **embed multiple knowledge bases into pre-trained language models** (KB in the 
sense as fixed collection of entity nodes)

&gt; The key idea is to explicitly model
entity spans in the input text and use an **entity
linker** to retrieve relevant entity embeddings from
a KB to form knowledge enhanced entity-span
representations.
&gt; Then,  update contextual word representations via a form of **word-to-entity attention**. 
&gt; In contrast to previous approaches, the entity linkers and self-supervised language modeling objective are jointly trained end-to-end in a multitask setting that **combines a small amount of entity linking supervision with a large amount of raw text**.		</description>		<dc:date>2020-05-13T01:44:51Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/05/phd_thesis_deep_learning_with_">		<title>Thomas Kipf&apos;s PhD thesis: &quot;Deep Learning with Graph-Structured Representations&quot;</title>		<link>http://www.semanlink.net/doc/2020/05/phd_thesis_deep_learning_with_</link>		<description>Covers a range of emerging topics in Deep Learning: from graph neural nets (and graph convolutions) to structure discovery (objects, relations, events)		</description>		<dc:date>2020-05-05T15:47:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/05/1911_03814_zero_shot_entity_l">		<title>[1911.03814&#93; Scalable Zero-shot Entity Linking with Dense Entity Retrieval</title>		<link>http://www.semanlink.net/doc/2020/05/1911_03814_zero_shot_entity_l</link>		<description>&gt; a two stage approach, based on fine-tuned BERT architectures. In the first stage, we do retrieval
in a dense space defined by a bi-encoder that
independently embeds the mention context and the
entity descriptions (Humeau et al., 2019; Gillick
et al., 2019). Each retrieved candidate is then examined
more carefully with a cross-encoder that
concatenates the mention and entity text,		</description>		<dc:date>2020-05-02T11:43:47Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/03/bert_elmo_gpt_2_how_contex">		<title>BERT, ELMo, &amp; GPT-2: How Contextual are Contextualized Word Representations? | SAIL Blog</title>		<link>http://www.semanlink.net/doc/2020/03/bert_elmo_gpt_2_how_contex</link>		<dc:date>2020-03-28T10:33:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/03/_1909_03193_kg_bert_bert_for_">		<title>[1909.03193&#93; KG-BERT: BERT for Knowledge Graph Completion</title>		<link>http://www.semanlink.net/doc/2020/03/_1909_03193_kg_bert_bert_for_</link>		<description>Pre-trained language models for knowledge graph completion. **Triples are treated as textual sequences**. (Hum, j&apos;ai déjà vu ça quelque part. Ah, peut-être [RDF2VEC&#93;(tag:rdf2vec)? // TODO à voir)

Takes entity and relation descriptions of a triple as input and computes scoring function of the triple with the KG-BERT language model

&gt; we first treat entities, relations and triples as
textual sequences and turn knowledge graph completion into
a sequence classification problem. We then fine-tune BERT
model on these sequences for predicting the plausibility of
a triple or a relation. The method

[GitHub&#93;(https://github.com/yao8839836/kg-bert)		</description>		<dc:date>2020-03-22T18:56:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/03/_1909_07606_k_bert_enabling_l">		<title>[1909.07606&#93; K-BERT: Enabling Language Representation with Knowledge Graph</title>		<link>http://www.semanlink.net/doc/2020/03/_1909_07606_k_bert_enabling_l</link>		<description>a knowledge-enabled language representation model (K-BERT) with knowledge graphs (KGs), in which triples are injected into the sentences as domain knowledge

(Summarized in [Domain adaptation of word embeddings through the exploitation of in-domain corpora and knowledge bases (PhD Thesis 2021)&#93;(doc:2022/03/domain_adaptation_of_word_embed), p43)		</description>		<dc:date>2020-03-08T22:54:15Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/03/unsupervised_ner_using_bert_h">		<title>Unsupervised NER using BERT - Hands-on NLP model review - Quora</title>		<link>http://www.semanlink.net/doc/2020/03/unsupervised_ner_using_bert_h</link>		<description>[GitHub&#93;(https://github.com/ajitrajasekharan/unsupervised_NER)		</description>		<dc:date>2020-03-06T00:12:06Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/_2002_12327_a_primer_in_bertol">		<title>[2002.12327&#93; A Primer in BERTology: What we know about how BERT works</title>		<link>http://www.semanlink.net/doc/2020/02/_2002_12327_a_primer_in_bertol</link>		<description>(article praised on [twitter&#93;(https://twitter.com/dennybritz/status/1233343170596917248?s=20) by D Britz and Y. Goldberg)		</description>		<dc:date>2020-02-28T13:25:30Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/_2002_11402_detecting_potentia">		<title>[2002.11402&#93; Detecting Potential Topics In News Using BERT, CRF and Wikipedia</title>		<link>http://www.semanlink.net/doc/2020/02/_2002_11402_detecting_potentia</link>		<dc:date>2020-02-27T23:36:54Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/distilling_bert_models_with_spa">		<title>Distilling BERT models with spaCy - Towards Data Science (2019)</title>		<link>http://www.semanlink.net/doc/2020/02/distilling_bert_models_with_spa</link>		<dc:date>2020-02-15T11:15:11Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/hugging_face_sur_twitter_to_">		<title>Hugging Face sur Twitter :  DistilBERT-cased for Question Answering w/ just 3 lines of javascript</title>		<link>http://www.semanlink.net/doc/2020/02/hugging_face_sur_twitter_to_</link>		<dc:date>2020-02-14T00:23:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/how_much_knowledge_can_you_pack">		<title>How Much Knowledge Can You Pack Into the Parameters of a Language Model?</title>		<link>http://www.semanlink.net/doc/2020/02/how_much_knowledge_can_you_pack</link>		<description>&gt; It has recently been observed that neural language
models trained on unstructured text can
implicitly store and retrieve knowledge using
natural language queries.

indeed, cf. Facebook&apos;s paper [Language Models as Knowledge Bases?&#93;(/doc/2019/09/_1909_01066_language_models_as)

&gt; In this short paper,
we measure the practical utility of this
approach by fine-tuning pre-trained models to
answer questions without access to any external
context or knowledge.


&gt; we show that a large language
model pre-trained on unstructured text can
attain competitive results on open-domain question
answering benchmarks without any access
to external knowledge

BUT:

&gt;1. state-of-the-art results only with the largest model
which had 11 billion parameters.
&gt;1. “open-book” models
typically provide some indication of what information
they accessed when answering a question
that provides a useful form of interpretability.
In contrast, our model distributes knowledge
in its parameters in an inexplicable way, which
precludes this form of interpretability.
&gt;1. **the maximum-likelihood objective provides no guarantees as to whether
a model will learn a fact or not.**

So, what&apos;s the point? To be compared with this [IBM&apos;s paper&#93;(/doc/2019/09/_1909_04120_span_selection_pre): &quot;a new pre-training task inspired by reading comprehension and an effort to avoid encoding general knowledge in the transformer network itself&quot;		</description>		<dc:date>2020-02-11T22:56:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/adam_roberts_sur_twitter_new">		<title>Adam Roberts sur Twitter : &quot;New preprint: How Much Knowledge Can You Pack into the Parameters of a Language Model?...&quot;</title>		<link>http://www.semanlink.net/doc/2020/02/adam_roberts_sur_twitter_new</link>		<description>[paper&#93;(/doc/2020/02/how_much_knowledge_can_you_pack)		</description>		<dc:date>2020-02-11T12:24:21Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/_1911_05507_compressive_transf">		<title>[1911.05507&#93; Compressive Transformers for Long-Range Sequence Modelling</title>		<link>http://www.semanlink.net/doc/2020/02/_1911_05507_compressive_transf</link>		<description>&gt; the Compressive Transformer, an attentive sequence model which compresses past memories for long-range sequence learning.

[Blog post&#93;(/doc/2020/02/a_new_model_and_dataset_for_lon)		</description>		<dc:date>2020-02-11T08:48:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/a_new_model_and_dataset_for_lon">		<title>A new model and dataset for long-range memory | DeepMind</title>		<link>http://www.semanlink.net/doc/2020/02/a_new_model_and_dataset_for_lon</link>		<description>the use of memory in deep learning, and how modelling language may be an ideal task for developing better memory architectures

[paper&#93;(/doc/2020/02/_1911_05507_compressive_transf)		</description>		<dc:date>2020-02-11T08:40:48Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/_2002_02925_bert_of_theseus_c">		<title>[2002.02925&#93; BERT-of-Theseus: Compressing BERT by Progressive Module Replacing</title>		<link>http://www.semanlink.net/doc/2020/02/_2002_02925_bert_of_theseus_c</link>		<description>approach to compress BERT by progressive module replacing.

&gt; Compared to the previous knowledge distillation approaches for BERT compression, our approach leverages only one loss function and one hyper-parameter

[Github&#93;(https://github.com/JetRunner/BERT-of-Theseus)		</description>		<dc:date>2020-02-10T21:50:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/02/canwen_xu_sur_twitter_wtf_w">		<title>Canwen Xu sur Twitter : &quot;WTF? We brutally dismember BERT and replace all his organs?&quot;</title>		<link>http://www.semanlink.net/doc/2020/02/canwen_xu_sur_twitter_wtf_w</link>		<description>[paper&#93;(/doc/2020/02/_2002_02925_bert_of_theseus_c)		</description>		<dc:date>2020-02-10T09:21:44Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/_1503_03832_facenet_a_unified">		<title>[1503.03832&#93; FaceNet: A Unified Embedding for Face Recognition and Clustering</title>		<link>http://www.semanlink.net/doc/2020/01/_1503_03832_facenet_a_unified</link>		<description>Learns a Euclidean embedding per image

&gt; Uses a deep CNN trained to directly optimize the embedding itself, rather than an intermediate bottleneck layer as in previous deep learning approaches. To train, we use triplets of roughly aligned matching / non-matching face patches generated using a novel online triplet mining method.

&gt; state-of-the-art face recognition performance using only **128-bytes per face**. 

		</description>		<dc:date>2020-01-25T01:03:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/paris_nlp_season_4_meetup_3_">		<title>Paris NLP Season 4 Meetup #3 – Paris NLP (2020)</title>		<link>http://www.semanlink.net/doc/2020/01/paris_nlp_season_4_meetup_3_</link>		<description>- Siamese CNN for jobs-candidate matching: learning document embeddings with triplet loss.
- Sesame street-based naming schemes must fade out, long live CamemBERT et le French fromage!		</description>		<dc:date>2020-01-23T22:26:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/semantic_text_matching_for_long">		<title>Semantic Text Matching for Long-Form Documents (2019)</title>		<link>http://www.semanlink.net/doc/2020/01/semantic_text_matching_for_long</link>		<description>**A document can be represented as a hierarchy
of paragraph, sentence and word sequences.** Different paragraphs
and sentences can have different semantic meaning
and importance.

A multi-depth attention-based hierarchical RNN derive representations for each level of document
structure, which are then aggregated to build a representation of the entire document

Uses a Siamese structure for semantic text matching.		</description>		<dc:date>2020-01-23T10:21:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/building_a_search_engine_with_b">		<title>Building a Search Engine with BERT and TensorFlow - Towards Data Science</title>		<link>http://www.semanlink.net/doc/2020/01/building_a_search_engine_with_b</link>		<description>[somewhat related&#93;(/doc/2020/01/elasticsearch_meets_bert_build)		</description>		<dc:date>2020-01-12T17:13:45Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/elasticsearch_meets_bert_build">		<title>Elasticsearch meets BERT: Building Search Engine with Elasticsearch and BERT</title>		<link>http://www.semanlink.net/doc/2020/01/elasticsearch_meets_bert_build</link>		<description>- Links to [this ES blog post&#93;(/doc/2020/01/text_similarity_search_in_elast)
- [somewhat related&#93;(/doc/2020/01/building_a_search_engine_with_b)		</description>		<dc:date>2020-01-10T17:23:50Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/nlp_s_clever_hans_moment_has_ar">		<title>NLP&apos;s Clever Hans Moment has Arrived</title>		<link>http://www.semanlink.net/doc/2020/01/nlp_s_clever_hans_moment_has_ar</link>		<description>Do neural networks learn what we think they learn? @benbenhh reviews research that suggests that they often instead fall prey to the so-called Clever Hans effect and discusses its implications for NLP.		</description>		<dc:date>2020-01-10T16:33:27Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/investigating_entity_knowledge_">		<title>[2003.05473&#93; Investigating Entity Knowledge in BERT with Simple Neural End-To-End Entity Linking (CoNNL 2019)</title>		<link>http://www.semanlink.net/doc/2020/01/investigating_entity_knowledge_</link>		<description>Training BERT-base-uncased on English Wikipedia and then fine-tuned and evaluating it
on an entity linking (EL) benchmark (EL implemented as a token classification over the entity vocabulary)

&gt; BERT+Entity is a straightforward extension on top
of BERT, i.e. we initialize BERT with the publicly
available weights from the BERT-base-uncased
model and add an output classification layer on
top of the architecture. Given a contextualized token,
the classifier computes the probability of an
entity link for each entry in the entity vocabulary.

Can BERT’s architecture learn all entity
linking steps jointly? To answer:

&gt; an extreme
simplification of the **entity linking setup that
works surprisingly well**: simply cast it as **a
per token classification over the entire entity
vocabulary** (over 700K classes in our case).

&gt; the model
is the first that performs entity linking without any
pipeline or any heuristics, compared to all prior
approaches. We found that with our approach we
can learn additional entity knowledge in BERT that
helps in entity linking. **However, we also found
that almost none of the downstream tasks really
required entity knowledge**.

### Related work 

- &gt; [Durrett and Klein (2014)&#93;(/doc/2020/01/a_joint_model_for_entity_analys) were the first to propose
jointly modelling Mention detection, Candidate generation and Entity disambiguation in a graphical
model and could show that each of those steps are
interdependent and benefit from a joint objective

This paper uses neural techniques instead of CRF.

- &gt; [Yamada&#93;(/showprop.do?pptyuri=http%3A%2F%2Fwww.semanlink.net%2F2001%2F00%2Fsemanlink-schema%23arxiv_author&amp;pptyval=Ikuya%2BYamada) (2016, 2017) was the first to
investigate neural text representations and entity
linking, but their approach is limited to ED.

cf. [#Wikipedia2Vec&#93;(tag:wikipedia2vec). Compare with [newer work by Yamada&#93;(doc:2020/09/1909_01259_neural_attentive_b)		</description>		<dc:date>2020-01-09T10:36:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/named_entity_recognition_with_b">		<title>Named Entity Recognition with Bert – Depends on the definition</title>		<link>http://www.semanlink.net/doc/2020/01/named_entity_recognition_with_b</link>		<description>&gt; how you can finetune the Bert model to do state-of-the art named entity recognition

Same author: [NER with Lime&#93;(/doc/2020/01/interpretable_named_entity_reco)		</description>		<dc:date>2020-01-09T02:01:52Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/_1902_10909_bert_for_joint_int">		<title>[1902.10909&#93; BERT for Joint Intent Classification and Slot Filling</title>		<link>http://www.semanlink.net/doc/2020/01/_1902_10909_bert_for_joint_int</link>		<description>&gt; Experimental results show that our
proposed joint BERT model outperforms BERT
models modeling intent classification and slot filling
separately, demonstrating the efficacy of exploiting
the relationship between the two tasks.

Adding a CRF on top of the model doesn&apos;t improve the results.		</description>		<dc:date>2020-01-09T01:13:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/richer_sentence_embeddings_usin">		<title>Richer Sentence Embeddings using Sentence-BERT — Part I</title>		<link>http://www.semanlink.net/doc/2020/01/richer_sentence_embeddings_usin</link>		<description>Simplistic (and often used) methods for sentence embeddings with BERT are too simplistic to be good (avearaging the word vectors, or using the \[CLS\&#93; special vector (start of sequence).

[About this paper&#93;(/doc/2019/08/_1908_10084_sentence_bert_sen)		</description>		<dc:date>2020-01-06T01:48:12Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2020/01/lecture_14_contextual_vectors">		<title>Lecture 14 – Contextual Vectors | Stanford CS224U: Natural Language Understanding | Spring 2019</title>		<link>http://www.semanlink.net/doc/2020/01/lecture_14_contextual_vectors</link>		<dc:date>2020-01-05T18:17:47Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/11/artificial_human_intelligence_">		<title>Artificial Human Intelligence: The Programmer’s Apprentice - Tom Dean and Rishabh Singh - Google Research</title>		<link>http://www.semanlink.net/doc/2019/11/artificial_human_intelligence_</link>		<description>&gt; Our primary objective is to
build an end-to-end system for an individualized personal assistant that
focuses on a specific area of expertise, namely software engineering, that
learns from experience, works collaboratively with an expert programmer and
that provides value from day one.

&gt; Our **goal in developing systems that incorporate
characteristics of human intelligence** is two fold:
humans provide a complete solution that we can
use as a basic blueprint and then improve upon,
and **the resulting AI systems are likely to be well
suited to developing assistants** that complement
and extend human intelligence while **operating in
a manner comprehensible to our understanding**		</description>		<dc:date>2019-11-16T20:16:43Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/11/_1807_00082_amanuensis_the_pr">		<title>[1807.00082&#93; Amanuensis: The Programmer&apos;s Apprentice</title>		<link>http://www.semanlink.net/doc/2019/11/_1807_00082_amanuensis_the_pr</link>		<description>**The use of natural language to facilitate communication
between the expert programmer and apprentice AI system.**

&gt; an overview of the material covered in a course taught at Stanford in the spring quarter of 2018. The course draws upon **insight from cognitive and systems neuroscience to implement hybrid connectionist and symbolic reasoning systems** that leverage and extend the state of the art in machine learning **by integrating human and machine intelligence**. As a concrete example we focus on digital assistants that learn from continuous dialog with an expert software engineer while providing initial value as powerful analytical, computational and mathematical savants.

&gt; [#Dehaene&#93;(/tag/stanislas_dehaene)&apos;s work extends the [#Global Workspace Theory&#93;(/tag/global_workspace_theory) of Bernard Baars. Dehaene’s version of the theory combined with Yoshua Bengio’s concept of a [#consciousness prior&#93;(/tag/consciousness_prior.html) and deep reinforcement learning suggest a model for constructing and maintaining the cognitive states that arise and persist during complex problem solving.		</description>		<dc:date>2019-11-12T16:25:10Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/11/camembert">		<title>CamemBERT</title>		<link>http://www.semanlink.net/doc/2019/11/camembert</link>		<description>language model for French based on the RoBERTa architecture pretrained on the French subcorpus of the OSCAR multilingual corpus		</description>		<dc:date>2019-11-10T18:08:18Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/11/_1911_01464_emerging_cross_lin">		<title>[1911.01464&#93; Emerging Cross-lingual Structure in Pretrained Language Models</title>		<link>http://www.semanlink.net/doc/2019/11/_1911_01464_emerging_cross_lin</link>		<dc:date>2019-11-06T13:09:03Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/10/bert_is_now_part_of_google_sear">		<title>BERT is now part of Google Search, so let’s understand how it reasons</title>		<link>http://www.semanlink.net/doc/2019/10/bert_is_now_part_of_google_sear</link>		<dc:date>2019-10-31T08:28:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/10/restoring_ancient_text_using_de">		<title>Restoring ancient text using deep learning: a case study on Greek epigraphy | DeepMind</title>		<link>http://www.semanlink.net/doc/2019/10/restoring_ancient_text_using_de</link>		<dc:date>2019-10-18T00:50:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/10/language_and_perception_in_deep">		<title>Language and Perception in Deep Learning - Florian Strub DeepMind, Univ. Lille, Inria</title>		<link>http://www.semanlink.net/doc/2019/10/language_and_perception_in_deep</link>		<description>A [Related paper&#93;(/doc/2019/10/feature_wise_transformations)		</description>		<dc:date>2019-10-07T23:08:40Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/10/meet_albert_a_new_%E2%80%98lite_bert%E2%80%99_">		<title>Meet ALBERT: a new ‘Lite BERT’ from Google &amp; Toyota with State of the Art NLP performance and 18x fewer parameters.</title>		<link>http://www.semanlink.net/doc/2019/10/meet_albert_a_new_%E2%80%98lite_bert%E2%80%99_</link>		<dc:date>2019-10-01T15:21:13Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/09/evolution_of_representations_in">		<title>Evolution of Representations in the Transformer (2019)</title>		<link>http://www.semanlink.net/doc/2019/09/evolution_of_representations_in</link>		<description>Blog post about [this paper&#93;(http://127.0.0.1:8080/semanlink/doc/2019/09/_1909_01380_the_bottom_up_evol)		</description>		<dc:date>2019-09-16T22:02:56Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/09/introducing_neural_structured_l">		<title>Introducing Neural Structured Learning in TensorFlow</title>		<link>http://www.semanlink.net/doc/2019/09/introducing_neural_structured_l</link>		<description>Neural Structured Learning (NSL) is an open source framework for training deep neural networks with structured signals. It implements Neural Graph Learning, which enables developers to train neural networks using graphs.		</description>		<dc:date>2019-09-03T19:01:32Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/smaller_faster_cheaper_light">		<title>Smaller, faster, cheaper, lighter: Introducing DistilBERT, a distilled version of BERT</title>		<link>http://www.semanlink.net/doc/2019/08/smaller_faster_cheaper_light</link>		<dc:date>2019-08-28T22:47:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/_1908_10084_sentence_bert_sen">		<title>[1908.10084&#93; Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks</title>		<link>http://www.semanlink.net/doc/2019/08/_1908_10084_sentence_bert_sen</link>		<description>&gt; Sentence-BERT
(SBERT), a modification of the pretrained
BERT network that use siamese and triplet network
structures to derive **semantically meaningful
sentence embeddings** that can be compared
using cosine-similarity.

Important because 

- BERT ist unsuitable for semantic similarity
search as well as for unsupervised tasks
like clustering.
- simple methods such as using the CLS token give low quality sentence embeddings

However, the purpose of SBERT sentence embeddings
are **not to be used for transfer learning for other
tasks**.

[Related blog post&#93;(/doc/2020/01/richer_sentence_embeddings_usin); [Github&#93;(https://github.com/UKPLab/sentence-transformers)		</description>		<dc:date>2019-08-28T22:41:55Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/_1808_02590_a_tutorial_on_netw">		<title>[1808.02590&#93; A Tutorial on Network Embeddings</title>		<link>http://www.semanlink.net/doc/2019/08/_1808_02590_a_tutorial_on_netw</link>		<dc:date>2019-08-25T02:02:16Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/watch_your_step_learning_node_">		<title>Watch Your Step: Learning Node Embeddings via Graph Attention</title>		<link>http://www.semanlink.net/doc/2019/08/watch_your_step_learning_node_</link>		<dc:date>2019-08-23T00:32:38Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/_1905_07129_ernie_enhanced_la">		<title>[1905.07129&#93; ERNIE: Enhanced Language Representation with Informative Entities</title>		<link>http://www.semanlink.net/doc/2019/08/_1905_07129_ernie_enhanced_la</link>		<description>&gt; We argue that informative entities in **KGs can enhance language representation with external knowledge**. In this paper, we utilize both large-scale textual corpora and KGs to train an enhanced language representation model (ERNIE), which can take full advantage of lexical, syntactic, and knowledge information simultaneously.

&gt; ERNIE achieves significant improvements on
various knowledge-driven tasks, and meanwhile
is comparable with the state-of-the-art
model BERT on other common NLP tasks

[GitHub&#93;(https://github.com/thunlp/ERNIE)

WARNING, there is another ERNIE (by [NLP@Baidu&#93;(tag:nlp_baidu)): Yu Sun, Shuohuan Wang, Yukun Li, Shikun Feng, Xuyi
Chen, Han Zhang, Xin Tian, Danxiang Zhu, Hao Tian, and
Hua Wu. 2019. Ernie: Enhanced representation through
knowledge integration. This doesn&apos;t happen when you choose François-Paul as the name for your child.		</description>		<dc:date>2019-08-05T15:40:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/08/what_is_xlnet_and_why_it_outper">		<title>What is XLNet and why it outperforms BERT - Towards Data Science</title>		<link>http://www.semanlink.net/doc/2019/08/what_is_xlnet_and_why_it_outper</link>		<dc:date>2019-08-02T17:46:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/07/a2n_attending_to_neighbors_for">		<title>A2N: Attending to Neighbors for Knowledge Graph Inference - ACL 2019</title>		<link>http://www.semanlink.net/doc/2019/07/a2n_attending_to_neighbors_for</link>		<description>&gt; State-of-the-art models for knowledge graph completion aim at learning a fixed embedding representation of entities in a multi-relational graph which can generalize to infer unseen entity relationships at test time. This can be sub-optimal as it requires memorizing and generalizing to all possible entity relationships using these fixed representations. We thus propose a novel **attention-based method to learn query-dependent representation of entities** which adaptively combines the relevant graph neighborhood of an entity leading to more accurate KG completion.		</description>		<dc:date>2019-07-31T19:37:20Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/07/bert_s_success_in_some_benchmar">		<title>BERT&apos;s success in some benchmarks tests may be simply due to the exploitation of spurious statistical cues in the dataset. Without them it is no better then random. : MachineLearning</title>		<link>http://www.semanlink.net/doc/2019/07/bert_s_success_in_some_benchmar</link>		<dc:date>2019-07-24T01:35:24Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/07/_1907_07355_probing_neural_net">		<title>[1907.07355&#93; Probing Neural Network Comprehension of Natural Language Arguments</title>		<link>http://www.semanlink.net/doc/2019/07/_1907_07355_probing_neural_net</link>		<description>what has BERT learned about argument comprehension?

[Comments&#93;(/doc/2019/07/bert_s_success_in_some_benchmar)		</description>		<dc:date>2019-07-24T01:34:54Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/google_ai_blog_harnessing_orga">		<title>Google AI Blog: Harnessing Organizational Knowledge for Machine Learning (2019)</title>		<link>http://www.semanlink.net/doc/2019/06/google_ai_blog_harnessing_orga</link>		<description>how existing knowledge in an organization can be used as noisier, higher-level supervision—or, as it is often termed, weak supervision—to quickly label large training datasets

Snorkel Drybell, experimental internal system, which adapts the opensource
Snorkel framework to **use diverse organizational knowledge
resources—like internal models, ontologies, legacy rules, knowledge
graphs and more—in order to generate training data** for machine learning
models at web scale.

Enables writing **labeling functions** that label training data programmatically

[paper&#93;(/doc/2019/06/_1812_00417_snorkel_drybell_a)

		</description>		<dc:date>2019-06-28T02:00:39Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/_1906_04341_what_does_bert_loo">		<title>[1906.04341&#93; What Does BERT Look At? An Analysis of BERT&apos;s Attention</title>		<link>http://www.semanlink.net/doc/2019/06/_1906_04341_what_does_bert_loo</link>		<dc:date>2019-06-21T21:49:32Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/nlp_contextualized_word_embedd">		<title>NLP: Contextualized word embeddings from BERT – Towards Data Science</title>		<link>http://www.semanlink.net/doc/2019/06/nlp_contextualized_word_embedd</link>		<dc:date>2019-06-12T08:24:42Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/hamiltonian_neural_networks">		<title>Hamiltonian Neural Networks</title>		<link>http://www.semanlink.net/doc/2019/06/hamiltonian_neural_networks</link>		<description>&gt; Even though neural networks enjoy widespread use, they still struggle to learn the basic laws of physics. How might we endow them with better inductive biases? In this paper, we draw inspiration from Hamiltonian mechanics to train models that learn and respect exact conservation laws in an unsupervised manner.		</description>		<dc:date>2019-06-11T11:51:14Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/_1906_02715_visualizing_and_me">		<title>[1906.02715&#93; Visualizing and Measuring the Geometry of BERT</title>		<link>http://www.semanlink.net/doc/2019/06/_1906_02715_visualizing_and_me</link>		<description>&gt; At a high level, linguistic features seem to be represented in separate semantic and syntactic subspaces. We find evidence of a fine-grained geometric representation of word senses. We also present empirical descriptions of syntactic representations in both attention matrices and individual word embeddings, as well as a mathematical argument to explain the geometry of these representations		</description>		<dc:date>2019-06-07T23:33:36Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/06/_i_made_a_bet_that_a_naive_baye">		<title>&quot;I made a bet that a Naive Bayes classifier would work as well on humor recognition as a neural net with fine-tuned Bert embeddings. I won&quot;</title>		<link>http://www.semanlink.net/doc/2019/06/_i_made_a_bet_that_a_naive_baye</link>		<description>[Jeremy Howard&apos;s answer&#93;(https://forums.fast.ai/t/nlp-challenge-project/44153)		</description>		<dc:date>2019-06-06T22:48:05Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/05/introducing_fastbert_a_simple">		<title>Introducing FastBert — A simple Deep Learning library for BERT Models</title>		<link>http://www.semanlink.net/doc/2019/05/introducing_fastbert_a_simple</link>		<dc:date>2019-05-23T08:23:28Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/05/robust_language_representation_">		<title>Robust Language Representation Learning via Multi-task Knowledge Distillation - Microsoft Research</title>		<link>http://www.semanlink.net/doc/2019/05/robust_language_representation_</link>		<description>Related to [this&#93;(/doc/?uri=https%3A%2F%2Farxiv.org%2Fabs%2F1901.11504).		</description>		<dc:date>2019-05-19T23:16:17Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/05/_1905_05950_bert_rediscovers_t">		<title>[1905.05950&#93; BERT Rediscovers the Classical NLP Pipeline</title>		<link>http://www.semanlink.net/doc/2019/05/_1905_05950_bert_rediscovers_t</link>		<description>&gt; We find that the model represents the steps of the traditional NLP pipeline in an interpretable and localizable way, and that the regions responsible for each step appear in the expected sequence: POS tagging, parsing, NER, semantic roles, then coreference. Qualitative analysis reveals that the model can and often does adjust this pipeline dynamically, revising lower-level decisions on the basis of disambiguating information from higher-level representations.
		</description>		<dc:date>2019-05-18T17:50:08Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1904.08398">		<title>[1904.08398&#93; DocBERT: BERT for Document Classification</title>		<link>https://arxiv.org/abs/1904.08398</link>		<dc:date>2019-04-18T17:26:35Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1803.02893">		<title>[1803.02893&#93; An efficient framework for learning sentence representations</title>		<link>https://arxiv.org/abs/1803.02893</link>		<description>&quot;**Quick Thoughts**&quot;. Framework for learning sentence representations from unlabelled data.

&gt; we reformulate the problem of predicting the context in which a sentence appears as a classification problem.
		</description>		<dc:date>2019-03-20T17:47:59Z</dc:date>	</item>	<item rdf:about="http://www.offconvex.org/2019/03/19/CURL/">		<title>Contrastive Unsupervised Learning of Semantic Representations: A Theoretical Framework – Off the convex path (2019-03)</title>		<link>http://www.offconvex.org/2019/03/19/CURL/</link>		<description>[paper&#93;(/doc/?uri=https%3A%2F%2Farxiv.org%2Fabs%2F1902.09229).

Why do objectives similar the one used by word2vec succeed in such diverse settings? (&quot;Contrastive Unsupervised Representation
Learning&quot; (CURL): **methods that leverage similar pairs of data points**)

&gt; In contrastive learning the objective used at test time is very different from the training objective: generalization error is not the right
way to think about this. -&gt; a framework that formalizes the notion of semantic
similarity that is implicitly used by these algorithms

&gt; **if the unsupervised loss happens to be small at the end of contrastive learning then the resulting
representations perform well on downstream classification**

		</description>		<dc:date>2019-03-20T16:15:33Z</dc:date>	</item>	<item rdf:about="https://github.com/huggingface/pytorch-pretrained-BERT">		<title>huggingface/pytorch-pretrained-BERT: The Big-&amp;-Extending-Repository-of-Transformers: Pretrained PyTorch models for Google&apos;s BERT, OpenAI GPT &amp; GPT-2, Google/CMU Transformer-XL.</title>		<link>https://github.com/huggingface/pytorch-pretrained-BERT</link>		<dc:date>2019-03-15T22:38:21Z</dc:date>	</item>	<item rdf:about="https://twitter.com/fchollet/status/1105139360226140160">		<title>François Chollet sur Twitter : a crash course on everything you need to know to use TensorFlow 2.0 + Keras</title>		<link>https://twitter.com/fchollet/status/1105139360226140160</link>		<dc:date>2019-03-12T22:48:43Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1901.11504">		<title>[1901.11504&#93; Multi-Task Deep Neural Networks for Natural Language Understanding</title>		<link>https://arxiv.org/abs/1901.11504</link>		<description>outperforms BERT in nine of eleven benchmark NLP tasks		</description>		<dc:date>2019-02-17T12:30:18Z</dc:date>	</item>	<item rdf:about="https://blog.insightdatascience.com/using-bert-for-state-of-the-art-pre-training-for-natural-language-processing-1d87142c29e7">		<title>Using BERT for state-of-the-art pre-training for natural language processing</title>		<link>https://blog.insightdatascience.com/using-bert-for-state-of-the-art-pre-training-for-natural-language-processing-1d87142c29e7</link>		<dc:date>2019-02-14T16:45:56Z</dc:date>	</item>	<item rdf:about="https://nlp.stanford.edu/seminar/details/jdevlin.pdf">		<title>Jacob Devlin talks about BERT at the Stanford NLP seminar</title>		<link>https://nlp.stanford.edu/seminar/details/jdevlin.pdf</link>		<description>Includes new results such as the effect of the masking strategy, using synthetic training data,...		</description>		<dc:date>2019-02-11T11:20:39Z</dc:date>	</item>	<item rdf:about="https://www.zdnet.com/article/google-explores-ais-mysterious-polytope/">		<title>Google explores AI&apos;s mysterious polytope | ZDNet</title>		<link>https://www.zdnet.com/article/google-explores-ais-mysterious-polytope/</link>		<dc:date>2019-02-09T01:52:31Z</dc:date>	</item>	<item rdf:about="http://www.semanlink.net/doc/2019/02/keywords2vec">		<title>Keywords2vec</title>		<link>http://www.semanlink.net/doc/2019/02/keywords2vec</link>		<description>To generate a word2vec model, but using keywords instead of one word. Tokenize on stopwords + non word characters

(This remembers me author of [FlashText algorithm&#93;(tag:flashtext_algorithm.html) saying he had developed it to create word2vec models)		</description>		<dc:date>2019-02-09T01:43:55Z</dc:date>	</item>	<item rdf:about="https://www.lemonde.fr/pixels/article/2019/02/06/intelligence-artificielle-deepmind-s-interesse-au-jeu-de-cartes-francais-hanabi_5420186_4408996.html">		<title>Intelligence artificielle : DeepMind s’intéresse au jeu de cartes français Hanabi</title>		<link>https://www.lemonde.fr/pixels/article/2019/02/06/intelligence-artificielle-deepmind-s-interesse-au-jeu-de-cartes-francais-hanabi_5420186_4408996.html</link>		<dc:date>2019-02-07T01:39:52Z</dc:date>	</item>	<item rdf:about="https://nlpparis.files.wordpress.com/2019/01/hyperlex_meetup23011.pdf">		<title>Romain Vial (Hyperlex) at Paris NLP meetup, slides</title>		<link>https://nlpparis.files.wordpress.com/2019/01/hyperlex_meetup23011.pdf</link>		<description>&gt; Hyperlex is a contract analytics and management solution powered by artificial intelligence. Hyperlex helps companies manage and make the most of their contract portfolio by identifying relevant information and data to manage key contractual commitments.

&gt; Take-home message:
&gt;
&gt; - Sentence representation starts to be well understood empirically
&gt; - Large document representation is still an open (and interesting) problem!
 		</description>		<dc:date>2019-01-24T17:21:48Z</dc:date>	</item>	<item rdf:about="https://medium.com/data-from-the-trenches/training-cutting-edge-neural-networks-with-tensor2tensor-and-10-lines-of-code-10973c030b8">		<title>Training Cutting-Edge Neural Networks with Tensor2Tensor and 10 lines of code</title>		<link>https://medium.com/data-from-the-trenches/training-cutting-edge-neural-networks-with-tensor2tensor-and-10-lines-of-code-10973c030b8</link>		<dc:date>2019-01-21T10:58:18Z</dc:date>	</item>	<item rdf:about="https://twitter.com/dpkingma/status/1070856305831624704">		<title>Durk Kingma sur Twitter : about likelihood-based generative models</title>		<link>https://twitter.com/dpkingma/status/1070856305831624704</link>		<description>Durk Kingma sur Twitter

&gt; &quot;It is my personal belief is that sufficiently powerful likelihood-based generative models will usher in a new era of machine learning, allowing us to tackle important limitations of current machine learning, such as lacking data efficiency and generalization. [7/8&#93;&quot;		</description>		<dc:date>2018-12-07T08:38:44Z</dc:date>	</item>	<item rdf:about="https://openreview.net/forum?id=S1HlA-ZAZ">		<title>The Kanerva Machine: A Generative Distributed Memory | OpenReview (2018)</title>		<link>https://openreview.net/forum?id=S1HlA-ZAZ</link>		<description>A generative memory model that combines slow-learning neural networks and a fast-adapting linear Gaussian model as memory		</description>		<dc:date>2018-12-06T12:50:01Z</dc:date>	</item>	<item rdf:about="https://jalammar.github.io/illustrated-bert/">		<title>The Illustrated BERT, ELMo, and co. (How NLP Cracked Transfer Learning) – Jay Alammar</title>		<link>https://jalammar.github.io/illustrated-bert/</link>		<dc:date>2018-12-03T15:08:17Z</dc:date>	</item>	<item rdf:about="https://ai.googleblog.com/2018/10/google-at-emnlp-2018.html">		<title>Google AI Blog: Google at EMNLP 2018</title>		<link>https://ai.googleblog.com/2018/10/google-at-emnlp-2018.html</link>		<dc:date>2018-11-25T15:14:25Z</dc:date>	</item>	<item rdf:about="https://ai.googleblog.com/2018/11/open-sourcing-bert-state-of-art-pre.html">		<title>Google AI Blog: Open Sourcing BERT: State-of-the-Art Pre-training for Natural Language Processing</title>		<link>https://ai.googleblog.com/2018/11/open-sourcing-bert-state-of-art-pre.html</link>		<dc:date>2018-11-05T15:13:01Z</dc:date>	</item>	<item rdf:about="https://github.com/google-research/bert">		<title>GitHub - google-research/bert: TensorFlow code and pre-trained models for BERT</title>		<link>https://github.com/google-research/bert</link>		<description>Code and pretrained weights for BERT.
Includes scripts to reproduce results. BERT-Base can be fine-tuned on a standard GPU; for BERT-Large, a Cloud TPU is required		</description>		<dc:date>2018-11-05T15:04:06Z</dc:date>	</item>	<item rdf:about="https://aclanthology.coli.uni-saarland.de/papers/D18-1092/d18-1092">		<title>Self-Governing Neural Networks for On-Device Short Text Classification - Sujith Ravi | Zornitsa Kozareva (2018)</title>		<link>https://aclanthology.coli.uni-saarland.de/papers/D18-1092/d18-1092</link>		<description>[same paper&#93;(https://aclweb.org/anthology/papers/D/D18/D18-1092/)		</description>		<dc:date>2018-11-02T23:20:31Z</dc:date>	</item>	<item rdf:about="https://sermanet.github.io/imitate/">		<title>Time-Contrastive Networks: Self-Supervised Learning from Video (2017)</title>		<link>https://sermanet.github.io/imitate/</link>		<description>Self-supervised approach for learning representations and robotic behaviors entirely from unlabeled videos recorded from multiple viewpoints, and study how this representation can be used in two robotic imitation settings: imitating object interactions from videos of humans, and imitating human poses. 

&gt; We train our representations using a metric learning loss, where multiple simultaneous viewpoints of the same observation are attracted in the embedding space, while being repelled from temporal neighbors which are often visually similar but functionally different. In other words, the model simultaneously learns to recognize what is common between different-looking images, and what is different between similar-looking images.
&gt; This signal causes our model to discover attributes that do not change across viewpoint, but do change across time, while ignoring nuisance variables such as occlusions, motion blur, lighting and background. We demonstrate that this representation can be used by a robot to directly mimic human poses without an explicit correspondence, and that it can be used as a reward function within a reinforcement learning algorithm.		</description>		<dc:date>2018-10-27T14:59:43Z</dc:date>	</item>	<item rdf:about="https://twitter.com/TensorFlow/status/1055538593941409792">		<title>TensorFlow: how to load and save models at every epoch so you never lose time or data.</title>		<link>https://twitter.com/TensorFlow/status/1055538593941409792</link>		<dc:date>2018-10-26T16:31:02Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1703.03129">		<title>[1703.03129&#93; Learning to Remember Rare Events</title>		<link>https://arxiv.org/abs/1703.03129</link>		<description>&gt; a large-scale life-long memory module for use in deep learning. The module exploits fast nearest-neighbor algorithms for efficiency and thus scales to large memory sizes. Except for the nearest-neighbor query, the module is fully differentiable and trained end-to-end with no extra supervision. It operates in a life-long manner, i.e., without the need to reset it during training. 
&gt; Our memory module can be easily added to any part of a supervised neural network		</description>		<dc:date>2018-10-23T12:36:58Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1810.04805">		<title>[1810.04805&#93; BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding</title>		<link>https://arxiv.org/abs/1810.04805</link>		<description>**The &quot;Devlin et al 2019&quot; paper**

[Paper Dissected&#93;(https://datasciencetoday.net/index.php/en-us/nlp/211-paper-dissected-bert-pre-training-of-deep-bidirectional-transformers-for-language-understanding-explained)		</description>		<dc:date>2018-10-12T14:36:01Z</dc:date>	</item>	<item rdf:about="https://js.tensorflow.org/">		<title>TensorFlow.js</title>		<link>https://js.tensorflow.org/</link>		<dc:date>2018-10-10T11:27:13Z</dc:date>	</item>	<item rdf:about="https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb">		<title>Training on TPU</title>		<link>https://colab.research.google.com/github/tensorflow/tpu/blob/master/tools/colab/fashion_mnist.ipynb</link>		<dc:date>2018-10-05T08:19:26Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/For-what-tasks-is-Pytorch-preferable-to-Tensorflow">		<title>For what tasks is Pytorch preferable to Tensorflow? - Quora</title>		<link>https://www.quora.com/For-what-tasks-is-Pytorch-preferable-to-Tensorflow</link>		<dc:date>2018-08-28T09:23:39Z</dc:date>	</item>	<item rdf:about="https://medium.com/@faizanmukardam/simple-guide-to-neural-arithmetic-logic-units-nalu-explanation-intuition-and-code-64bc22605712">		<title>Simple guide to Neural Arithmetic Logic Units (NALU): Explanation, Intuition and Code</title>		<link>https://medium.com/@faizanmukardam/simple-guide-to-neural-arithmetic-logic-units-nalu-explanation-intuition-and-code-64bc22605712</link>		<description>a neural network model that can learn simple to complex numerical functions with great extrapolation (generalisation) ability
		</description>		<dc:date>2018-08-21T17:25:23Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1807.03748">		<title>[1807.03748&#93; Representation Learning with Contrastive Predictive Coding</title>		<link>https://arxiv.org/abs/1807.03748</link>		<description>&gt; a universal unsupervised learning approach to extract useful representations from high-dimensional data, which we call Contrastive Predictive Coding. The key insight of our model is to learn such representations by predicting the future in latent space by using powerful [autoregressive models&#93;(/tag/autoregressive_model). We use a probabilistic contrastive loss which induces the latent space to capture information that is maximally useful to predict future samples. It also makes the model tractable by using [negative sampling&#93;(/tag/negative_sampling).

a contrastive method that can be applied to any form of data that can be expressed in an ordered sequence: text, speech, video...		</description>		<dc:date>2018-07-21T10:05:02Z</dc:date>	</item>	<item rdf:about="https://www.tensorflow.org/extras/candidate_sampling.pdf">		<title>What is Candidate Sampling</title>		<link>https://www.tensorflow.org/extras/candidate_sampling.pdf</link>		<dc:date>2018-07-07T15:04:54Z</dc:date>	</item>	<item rdf:about="https://stats.stackexchange.com/questions/244616/how-sampling-works-in-word2vec-can-someone-please-make-me-understand-nce-and-ne/245452#245452">		<title>How sampling works in Word2vec? Can someone please make me understand NCE and negative sampling? - Cross Validated</title>		<link>https://stats.stackexchange.com/questions/244616/how-sampling-works-in-word2vec-can-someone-please-make-me-understand-nce-and-ne/245452#245452</link>		<description>&gt; In order to deal with the issue of the expensive computation of the softmax, Word2Vec uses a technique called noise-contrastive estimation... **The basic idea is to convert a multinomial classification problem (as it is the problem of predicting the next word) to a binary classification problem.**
		</description>		<dc:date>2018-07-07T15:02:59Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1806.01261">		<title>[1806.01261&#93; Relational inductive biases, deep learning, and graph networks</title>		<link>https://arxiv.org/abs/1806.01261</link>		<description>&gt; generalizing beyond one&apos;s experiences--a hallmark of human intelligence from infancy--remains a formidable challenge for modern AI

&gt; A key signature of human intelligence is the ability to make infine use of finite means&quot; (Humboldt,
1836; Chomsky, 1965) (ex: words / sentences

&gt; Here we explore how to improve modern AI&apos;s capacity for **combinatorial generalization** by
biasing learning towards structured representations and computations, and in particular, systems
that operate on graphs.

(papier recommandé par [Peter Bloem&#93;(tag:peter_bloem))		</description>		<dc:date>2018-06-13T13:34:03Z</dc:date>	</item>	<item rdf:about="https://cloud.google.com/tpu/">		<title>Cloud TPU – Accélérateurs de ML pour TensorFlow  |  Google Cloud</title>		<link>https://cloud.google.com/tpu/</link>		<dc:date>2018-05-31T16:23:57Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1803.11175">		<title>[1803.11175&#93; Universal Sentence Encoder</title>		<link>https://arxiv.org/abs/1803.11175</link>		<description>models for encoding sentences into embedding vectors that specifically target transfer learning to other NLP tasks.

&gt; With transfer learning via sentence embeddings, we observe surprisingly good performance with minimal amounts of supervised training data for a transfer task

mixes an unsupervised task using a large corpus together with the supervised SNLI task, leveraging the [#Transformer&#93;(/tag/attention_is_all_you_need) architecture		</description>		<dc:date>2018-05-29T16:50:18Z</dc:date>	</item>	<item rdf:about="https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder-large/1">		<title>Module google/universal-sentence-encoder  |  TensorFlow</title>		<link>https://www.tensorflow.org/hub/modules/google/universal-sentence-encoder-large/1</link>		<description>[Paper presented at EMNLP 2018&#93;(https://aclanthology.coli.uni-saarland.de/papers/D18-2029/d18-2029)
		</description>		<dc:date>2018-05-23T16:35:31Z</dc:date>	</item>	<item rdf:about="https://guillaumegenthial.github.io/testing.html">		<title>Testing Tensorflow code</title>		<link>https://guillaumegenthial.github.io/testing.html</link>		<dc:date>2018-05-21T12:04:22Z</dc:date>	</item>	<item rdf:about="https://towardsdatascience.com/how-to-use-dataset-in-tensorflow-c758ef9e4428">		<title>How to use Dataset in TensorFlow – Towards Data Science</title>		<link>https://towardsdatascience.com/how-to-use-dataset-in-tensorflow-c758ef9e4428</link>		<dc:date>2018-04-21T11:41:38Z</dc:date>	</item>	<item rdf:about="http://ruder.io/text-classification-tensorflow-estimators/">		<title>Text Classification with TensorFlow Estimators</title>		<link>http://ruder.io/text-classification-tensorflow-estimators/</link>		<dc:date>2018-04-17T14:19:22Z</dc:date>	</item>	<item rdf:about="https://deepmind.com/blog/learning-to-generate-images/">		<title>Learning to write programs that generate images | DeepMind</title>		<link>https://deepmind.com/blog/learning-to-generate-images/</link>		<description>This ability to interpret objects through the tools that created them gives us a richer understanding of the world and is an important aspect of our intelligence.		</description>		<dc:date>2018-03-28T12:11:42Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1803.05651">		<title>[1803.05651&#93; Word2Bits - Quantized Word Vectors</title>		<link>https://arxiv.org/abs/1803.05651</link>		<description>We show that high quality quantized word vectors using 1-2 bits per parameter can be learned by introducing a quantization function into Word2Vec. We furthermore show that training with the quantization function acts as a regularizer		</description>		<dc:date>2018-03-20T17:36:21Z</dc:date>	</item>	<item rdf:about="https://github.com/anvaka/word2vec-graph">		<title>GitHub - anvaka/word2vec-graph: Exploring word2vec embeddings as a graph of nearest neighbors</title>		<link>https://github.com/anvaka/word2vec-graph</link>		<dc:date>2018-03-12T11:22:58Z</dc:date>	</item>	<item rdf:about="http://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html">		<title>Codes of Interest: Using Bottleneck Features for Multi-Class Classification in Keras and TensorFlow</title>		<link>http://www.codesofinterest.com/2017/08/bottleneck-features-multi-class-classification-keras.html</link>		<dc:date>2018-03-04T16:49:06Z</dc:date>	</item>	<item rdf:about="https://github.com/tensorflow/models">		<title>GitHub - tensorflow/models: Models and examples built with TensorFlow</title>		<link>https://github.com/tensorflow/models</link>		<dc:date>2018-02-28T23:55:28Z</dc:date>	</item>	<item rdf:about="https://medium.com/scaleabout/a-gentle-introduction-to-doc2vec-db3e8c0cce5e">		<title>A gentle introduction to Doc2Vec – ScaleAbout – Medium</title>		<link>https://medium.com/scaleabout/a-gentle-introduction-to-doc2vec-db3e8c0cce5e</link>		<dc:date>2018-02-14T01:34:05Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/How-does-doc2vec-represent-feature-vector-of-a-document-Can-anyone-explain-mathematically-how-the-process-is-done/answer/Piyush-Bhardwaj-7">		<title>Explanation for Doc2Vec - Quora</title>		<link>https://www.quora.com/How-does-doc2vec-represent-feature-vector-of-a-document-Can-anyone-explain-mathematically-how-the-process-is-done/answer/Piyush-Bhardwaj-7</link>		<dc:date>2018-02-14T01:19:08Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1710.04099">		<title>[1710.04099&#93; Wembedder: Wikidata entity embedding web service</title>		<link>https://arxiv.org/abs/1710.04099</link>		<description>web service for querying an embedding of entities in the Wikidata knowledge graph. The embedding is trained on the Wikidata dump using Gensim&apos;s Word2Vec implementation and a simple graph walk		</description>		<dc:date>2018-02-13T19:14:37Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1712.09405">		<title>[1712.09405&#93; Advances in Pre-Training Distributed Word Representations</title>		<link>https://arxiv.org/abs/1712.09405</link>		<description>&gt; we show how to train high-quality word vector representations by using a combination of known tricks that are however rarely used together. The main result of our work is the new set of publicly available pre-trained models that outperform the current state of the art by a large margin on a number of tasks		</description>		<dc:date>2017-12-29T20:52:48Z</dc:date>	</item>	<item rdf:about="https://github.com/RaRe-Technologies/gensim/blob/c971411c09773488dbdd899754537c0d1a9fce50/docs/notebooks/WMD_tutorial.ipynb">		<title>gensim/WMD_tutorial.ipynb</title>		<link>https://github.com/RaRe-Technologies/gensim/blob/c971411c09773488dbdd899754537c0d1a9fce50/docs/notebooks/WMD_tutorial.ipynb</link>		<description>Finding similar documents with Word2Vec and WMD (Word Mover’s Distance)		</description>		<dc:date>2017-12-23T14:12:41Z</dc:date>	</item>	<item rdf:about="http://learningsys.org/nips17/assets/slides/dean-nips17.pdf">		<title>Machine Learning for Systems and Systems for Machine Learning (NIPS 2017)</title>		<link>http://learningsys.org/nips17/assets/slides/dean-nips17.pdf</link>		<dc:date>2017-12-12T10:57:13Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1712.01208v1">		<title>[1712.01208&#93; The Case for Learned Index Structures</title>		<link>https://arxiv.org/abs/1712.01208v1</link>		<description>&gt; we believe that the idea of replacing core components of a data management system through learned models has far reaching implications for future systems designs 
&gt;
&gt; Indexes are models: a B-Tree-Index can be seen as a model to map a key to the position of a record within a sorted array, a Hash-Index as a model to map a key to a position of a record within an unsorted array, and a BitMap-Index as a model to indicate if a data record exists or not. In this exploratory research paper, we start from this premise and posit that all existing index structures can be replaced with other types of models, including deep-learning models, which we term learned indexes.		</description>		<dc:date>2017-12-11T19:25:09Z</dc:date>	</item>	<item rdf:about="http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/">		<title>Implementing a CNN for Text Classification in TensorFlow – WildML</title>		<link>http://www.wildml.com/2015/12/implementing-a-cnn-for-text-classification-in-tensorflow/</link>		<dc:date>2017-11-06T18:56:50Z</dc:date>	</item>	<item rdf:about="https://www.kaggle.com/cpmpml/spell-checker-using-word2vec?scriptVersionId=1152488">		<title>Spell Checker using Word2vec | Kaggle</title>		<link>https://www.kaggle.com/cpmpml/spell-checker-using-word2vec?scriptVersionId=1152488</link>		<dc:date>2017-11-03T10:46:08Z</dc:date>	</item>	<item rdf:about="https://www.kaggle.com/lystdo/lstm-with-word2vec-embeddings">		<title>LSTM with word2vec embeddings | Kaggle</title>		<link>https://www.kaggle.com/lystdo/lstm-with-word2vec-embeddings</link>		<dc:date>2017-10-25T15:50:14Z</dc:date>	</item>	<item rdf:about="http://ben.bolte.cc/blog/2016/gensim.html">		<title>Using Gensim Word2Vec Embeddings in Keras | Ben Bolte&apos;s Blog</title>		<link>http://ben.bolte.cc/blog/2016/gensim.html</link>		<dc:date>2017-10-23T09:05:11Z</dc:date>	</item>	<item rdf:about="http://adventuresinmachinelearning.com/recurrent-neural-networks-lstm-tutorial-tensorflow/">		<title>Recurrent neural networks and LSTM tutorial in Python and TensorFlow - Adventures in Machine Learning</title>		<link>http://adventuresinmachinelearning.com/recurrent-neural-networks-lstm-tutorial-tensorflow/</link>		<dc:date>2017-10-23T08:53:16Z</dc:date>	</item>	<item rdf:about="http://adventuresinmachinelearning.com/word2vec-keras-tutorial/">		<title>A Word2Vec Keras tutorial</title>		<link>http://adventuresinmachinelearning.com/word2vec-keras-tutorial/</link>		<dc:date>2017-10-23T01:22:35Z</dc:date>	</item>	<item rdf:about="https://www.tensorflow.org/install/install_mac">		<title>Installing TensorFlow on Mac OS X  |  TensorFlow</title>		<link>https://www.tensorflow.org/install/install_mac</link>		<dc:date>2017-10-23T00:19:06Z</dc:date>	</item>	<item rdf:about="https://deepmind.com/blog/alphago-zero-learning-scratch/">		<title>AlphaGo Zero: Learning from scratch | DeepMind</title>		<link>https://deepmind.com/blog/alphago-zero-learning-scratch/</link>		<dc:date>2017-10-18T22:43:19Z</dc:date>	</item>	<item rdf:about="http://www.lemonde.fr/pixels/article/2017/10/18/intelligence-artificielle-toujours-plus-puissant-alphago-apprend-desormais-sans-donnees-humaines_5202931_4408996.html">		<title>Intelligence artificielle : toujours plus puissant, AlphaGo apprend désormais sans données humaines</title>		<link>http://www.lemonde.fr/pixels/article/2017/10/18/intelligence-artificielle-toujours-plus-puissant-alphago-apprend-desormais-sans-donnees-humaines_5202931_4408996.html</link>		<dc:date>2017-10-18T22:38:12Z</dc:date>	</item>	<item rdf:about="http://nicodjimenez.github.io/2017/10/08/tensorflow.html">		<title>Tensorflow sucks</title>		<link>http://nicodjimenez.github.io/2017/10/08/tensorflow.html</link>		<description>see [What do people think of the TensorFlow sucks article? on Quora&#93;(https://www.quora.com/What-do-people-think-of-the-TensorFlow-sucks-article)		</description>		<dc:date>2017-10-16T14:34:28Z</dc:date>	</item>	<item rdf:about="https://web.stanford.edu/class/cs276/handouts/lecture20-distributed-representations.pdf">		<title>Distributed Word Representations for Information Retrieval</title>		<link>https://web.stanford.edu/class/cs276/handouts/lecture20-distributed-representations.pdf</link>		<description>includes description of word2vec
		</description>		<dc:date>2017-10-01T19:10:39Z</dc:date>	</item>	<item rdf:about="https://github.com/tensorflow/nmt">		<title>TensorFlow Neural Machine Translation (seq2seq) Tutorial</title>		<link>https://github.com/tensorflow/nmt</link>		<dc:date>2017-09-18T14:14:51Z</dc:date>	</item>	<item rdf:about="http://mccormickml.com/2016/04/27/word2vec-resources/">		<title>Word2Vec Resources · Chris McCormick</title>		<link>http://mccormickml.com/2016/04/27/word2vec-resources/</link>		<dc:date>2017-09-12T12:21:25Z</dc:date>	</item>	<item rdf:about="http://mccormickml.com/2017/01/11/word2vec-tutorial-part-2-negative-sampling/">		<title>Word2Vec Tutorial Part 2 - Negative Sampling · Chris McCormick</title>		<link>http://mccormickml.com/2017/01/11/word2vec-tutorial-part-2-negative-sampling/</link>		<description>the tweaks to make training feasible		</description>		<dc:date>2017-09-10T17:23:52Z</dc:date>	</item>	<item rdf:about="http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/">		<title>Word2Vec Tutorial - The Skip-Gram Model · Chris McCormick</title>		<link>http://mccormickml.com/2016/04/19/word2vec-tutorial-the-skip-gram-model/</link>		<description>skip-gram		</description>		<dc:date>2017-09-10T17:16:26Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/How-does-word2vec-work-Can-someone-walk-through-a-specific-example">		<title>How does word2vec work? Can someone walk through a specific example? - Quora</title>		<link>https://www.quora.com/How-does-word2vec-work-Can-someone-walk-through-a-specific-example</link>		<dc:date>2017-08-28T16:26:41Z</dc:date>	</item>	<item rdf:about="https://www.tensorflow.org/tutorials/word2vec">		<title>Vector Representations of Words  |  TensorFlow</title>		<link>https://www.tensorflow.org/tutorials/word2vec</link>		<dc:date>2017-08-28T15:41:07Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/pdf/1507.07998.pdf">		<title>[1507.07998&#93; Document Embedding with Paragraph Vectors</title>		<link>https://arxiv.org/pdf/1507.07998.pdf</link>		<dc:date>2017-08-20T23:29:27Z</dc:date>	</item>	<item rdf:about="http://blog.aylien.com/overview-word-embeddings-history-word2vec-cbow-glove/">		<title>An overview of word embeddings and their connection to distributional semantic models - AYLIEN (2016)</title>		<link>http://blog.aylien.com/overview-word-embeddings-history-word2vec-cbow-glove/</link>		<description>&gt; While on the surface DSMs and word embedding models use varying algorithms to learn word representations – the former count, the latter predict – both types of model fundamentally act on the same underlying statistics of the data, i.e. the co-occurrence counts between words...

&gt; These results are in contrast to the general consensus that word embeddings are superior to traditional methods. Rather, they indicate that it typically makes no difference whatsoever whether word embeddings or distributional methods are used. What really matters is that your hyperparameters are tuned and that you utilize the appropriate pre-processing and post-processing steps.		</description>		<dc:date>2017-07-20T15:43:09Z</dc:date>	</item>	<item rdf:about="https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-3-more-fun-with-word-vectors">		<title>More Fun With Word Vectors - Bag of Words Meets Bags of Popcorn | Kaggle</title>		<link>https://www.kaggle.com/c/word2vec-nlp-tutorial/details/part-3-more-fun-with-word-vectors</link>		<description>&gt; We found that the code above gives about the same (or slightly worse) results compared to the Bag of Words		</description>		<dc:date>2017-07-20T14:56:22Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/Can-I-use-word2vec-representation-to-train-a-weka-classifier">		<title>Can I use word2vec representation to train a weka classifier? - Quora</title>		<link>https://www.quora.com/Can-I-use-word2vec-representation-to-train-a-weka-classifier</link>		<dc:date>2017-07-20T13:45:20Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/Can-I-use-word2vec-to-train-a-machine-learning-classifier">		<title>Can I use word2vec to train a machine learning classifier? - Quora</title>		<link>https://www.quora.com/Can-I-use-word2vec-to-train-a-machine-learning-classifier</link>		<dc:date>2017-07-20T13:42:49Z</dc:date>	</item>	<item rdf:about="http://fauconnier.github.io/">		<title>Some pre-trained word2vec models for French</title>		<link>http://fauconnier.github.io/</link>		<dc:date>2017-07-20T13:00:27Z</dc:date>	</item>	<item rdf:about="https://arxiv.org/abs/1405.4053">		<title>[1405.4053&#93; Distributed Representations of Sentences and Documents</title>		<link>https://arxiv.org/abs/1405.4053</link>		<description>Paragraph Vector: an unsupervised algorithm that learns fixed-length feature representations from variable-length pieces of texts, such as sentences, paragraphs, and documents.Represents each document by a dense vector which is trained to predict words in the document. Overcomes the weaknesses of the [Bag Of Words&#93;(/tag/bag_of_words) model (order of words, semantic of words)


		</description>		<dc:date>2017-07-10T16:20:03Z</dc:date>	</item>	<item rdf:about="https://www.analyticsvidhya.com/blog/2017/06/word-embeddings-count-word2veec/">		<title>An Intuitive Understanding of Word Embeddings: From Count Vectors to Word2Vec</title>		<link>https://www.analyticsvidhya.com/blog/2017/06/word-embeddings-count-word2veec/</link>		<description>Types of word embeddings:

- Frequency based Embedding
    - Count Vector
    - TF-IDF Vector
    - Co-Occurrence Vector 
        - Co_occurence matrix (with a fixed context window), size V*V or V * N (Vocab size * subset of V size) matrix. 
        - PCA or SVD: keeping the k most important eigenvalues
- Prediction based Embedding
    - CBOW (Continuous Bag Of Words). 1 hidden layer, one output layer. Predict the probability of a word given a context
    - Skip-gram. Predict the proba of the context given a word

Sample code using gensim		</description>		<dc:date>2017-06-09T17:48:39Z</dc:date>	</item>	<item rdf:about="https://github.com/3Top/word2vec-api">		<title>word2vec-api</title>		<link>https://github.com/3Top/word2vec-api</link>		<description>Simple web service providing a word embedding API. The methods are based on Gensim Word2Vec implementation.&lt;br/&gt;
List of word2vec datasets
		</description>		<dc:date>2017-06-09T17:24:25Z</dc:date>	</item>	<item rdf:about="https://radimrehurek.com/gensim/models/word2vec.html">		<title>gensim: models.word2vec – Deep learning with word2vec</title>		<link>https://radimrehurek.com/gensim/models/word2vec.html</link>		<dc:date>2017-06-01T13:05:30Z</dc:date>	</item>	<item rdf:about="https://rare-technologies.com/word2vec-tutorial/">		<title>Word2vec in gensim Tutorial | RaRe Technologies</title>		<link>https://rare-technologies.com/word2vec-tutorial/</link>		<dc:date>2017-06-01T02:22:33Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/Are-there-any-more-modern-alternatives-to-word2vec">		<title>alternatives to word2vec? - Quora</title>		<link>https://www.quora.com/Are-there-any-more-modern-alternatives-to-word2vec</link>		<dc:date>2017-05-23T15:06:24Z</dc:date>	</item>	<item rdf:about="https://transacl.org/ojs/index.php/tacl/article/view/582/158">		<title>Improving Topic Models with Latent Feature Word Representations | Nguyen | Transactions of the Association for Computational Linguistics</title>		<link>https://transacl.org/ojs/index.php/tacl/article/view/582/158</link>		<dc:date>2017-05-20T14:05:12Z</dc:date>	</item>	<item rdf:about="http://stackoverflow.com/questions/32979254/using-word2vec-for-topic-modeling">		<title>Using Word2Vec for topic modeling - Stack Overflow</title>		<link>http://stackoverflow.com/questions/32979254/using-word2vec-for-topic-modeling</link>		<dc:date>2017-05-19T00:22:06Z</dc:date>	</item>	<item rdf:about="http://nadbordrozd.github.io/blog/2016/05/20/text-classification-with-word2vec/">		<title>Text Classification With Word2Vec - DS lore (2016)</title>		<link>http://nadbordrozd.github.io/blog/2016/05/20/text-classification-with-word2vec/</link>		<description>&gt; Overall, we won’t be throwing away our SVMs any time soon in favor of word2vec but it has it’s place in text classification.
&gt;
&gt; 1. SVM’s are pretty great at text classification tasks
&gt; 2. Models based on simple averaging of word-vectors can be surprisingly good too (given how much information is lost in taking the average)
&gt; 3. but they only seem to have a clear advantage when there is ridiculously little labeled training data
&gt;
&gt; Update 2017: actually, the best way to utilise the pretrained embeddings would probably be this [using keras&#93;(https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html)

Sample code to benchmark a few text categorization models to test whehter word embeddings like word2vec can improve text classification accuracy.
Sample code (based on scikit-learn) includes an embedding vectorizer that is given embedding dataset and vectorizes texts by taking the mean of all the vectors corresponding to individual words.

		</description>		<dc:date>2017-05-18T23:42:46Z</dc:date>	</item>	<item rdf:about="http://clic.cimec.unitn.it/marco/publications/acl2014/baroni-etal-countpredict-acl2014.pdf">		<title>Don’t count, predict! A systematic comparison of context-counting vs. context-predicting semantic vectors (2014)</title>		<link>http://clic.cimec.unitn.it/marco/publications/acl2014/baroni-etal-countpredict-acl2014.pdf</link>		<description>(good presentation in the intro of context-counting vs. context-predicting vectors)		</description>		<dc:date>2017-05-18T23:30:46Z</dc:date>	</item>	<item rdf:about="https://www.quora.com/How-is-GloVe-different-from-word2vec">		<title>How is GloVe different from word2vec? - Quora</title>		<link>https://www.quora.com/How-is-GloVe-different-from-word2vec</link>		<description>Both learn geometrical encodings (vectors) of words from their co-occurrence information. Word2vec is a &quot;predictive&quot; model, whereas GloVe is a &quot;count-based&quot; model.		</description>		<dc:date>2017-05-18T23:20:04Z</dc:date>	</item>	<item rdf:about="https://www.newscientist.com/article/2110522-googles-neural-networks-invent-their-own-encryption/">		<title>Google&apos;s neural networks invent their own encryption | New Scientist</title>		<link>https://www.newscientist.com/article/2110522-googles-neural-networks-invent-their-own-encryption/</link>		<dc:date>2016-11-06T01:56:28Z</dc:date>	</item>	<item rdf:about="http://fgiasson.com/blog/index.php/2016/09/28/using-cognonto-to-generate-domain-specific-word2vec-models/">		<title>Using Cognonto to Generate Domain Specific word2vec Models | Frederick Giasson</title>		<link>http://fgiasson.com/blog/index.php/2016/09/28/using-cognonto-to-generate-domain-specific-word2vec-models/</link>		<description>creating domain-specific training corpuses to use with word2vec can have a dramatic impact on the results and how results can be much more meaningful within the scope of that domain. Another advantage of the domain-specific training corpuses is that they create much smaller models.		</description>		<dc:date>2016-09-29T08:43:15Z</dc:date>	</item>	<item rdf:about="https://cloud.google.com/blog/big-data/2016/07/understanding-neural-networks-with-tensorflow-playground">		<title>Understanding neural networks with TensorFlow Playground | Google Cloud Big Data and Machine Learning Blog  |  Google Cloud Platform</title>		<link>https://cloud.google.com/blog/big-data/2016/07/understanding-neural-networks-with-tensorflow-playground</link>		<dc:date>2016-07-27T10:05:31Z</dc:date>	</item>	<item rdf:about="http://www.theguardian.com/world/2016/mar/13/go-humans-lee-sedol-scores-first-victory-against-supercomputer">		<title>Go humans: Lee Sedol scores first victory against supercomputer | World news | The Guardian</title>		<link>http://www.theguardian.com/world/2016/mar/13/go-humans-lee-sedol-scores-first-victory-against-supercomputer</link>		<dc:date>2016-03-13T20:28:38Z</dc:date>	</item>	<item rdf:about="http://www.wired.com/2016/03/sadness-beauty-watching-googles-ai-play-go">		<title>The Sadness and Beauty of Watching Google’s AI Play Go | WIRED</title>		<link>http://www.wired.com/2016/03/sadness-beauty-watching-googles-ai-play-go</link>		<dc:date>2016-03-11T21:01:33Z</dc:date>	</item>	<item rdf:about="http://www.lab41.org/anything2vec/">		<title>2Vec or Not 2Vec?</title>		<link>http://www.lab41.org/anything2vec/</link>		<dc:date>2016-03-05T14:37:01Z</dc:date>	</item>	<item rdf:about="http://deeplearning4j.org/word2vec.html">		<title>Word2vec: Neural Word Embeddings in Java - Deeplearning4j: Open-source, distributed deep learning for the JVM</title>		<link>http://deeplearning4j.org/word2vec.html</link>		<dc:date>2016-02-26T13:01:35Z</dc:date>	</item>	<item rdf:about="http://opiateforthemass.es/articles/mini-ai-app-using-tensorflow-and-shiny/">		<title>Mini AI app using TensorFlow and Shiny – Opiate for the masses</title>		<link>http://opiateforthemass.es/articles/mini-ai-app-using-tensorflow-and-shiny/</link>		<dc:date>2016-01-15T01:15:01Z</dc:date>	</item>	<item rdf:about="http://arxiv.org/pdf/1301.3781.pdf">		<title>[1301.3781&#93; Efficient Estimation of Word Representations in Vector Space</title>		<link>http://arxiv.org/pdf/1301.3781.pdf</link>		<description>We propose two novel model architectures for computing continuous vector representations of words from very large data sets. The quality of these representations is measured in a word similarity task, and the results are compared to the previously best performing techniques based on different types of neural networks. We observe large improvements in accuracy at much lower computational cost, i.e. it takes less than a day to learn high quality word vectors from a 1.6 billion words data set. Furthermore, we show that these vectors provide state-of-the-art performance on our test set for measuring syntactic and semantic word similarities.
		</description>		<dc:date>2016-01-13T23:07:45Z</dc:date>	</item>	<item rdf:about="http://www.nature.com/news/game-playing-software-holds-lessons-for-neuroscience-1.16979">		<title>Game-playing software holds lessons for neuroscience : Nature News &amp; Comment</title>		<link>http://www.nature.com/news/game-playing-software-holds-lessons-for-neuroscience-1.16979</link>		<dc:date>2016-01-12T18:33:23Z</dc:date>	</item>	<item rdf:about="http://robohub.org/how-friendly-is-your-ai-it-depends-on-the-rewards/">		<title>How friendly is your AI? It depends on the rewards | Robohub</title>		<link>http://robohub.org/how-friendly-is-your-ai-it-depends-on-the-rewards/</link>		<dc:date>2016-01-09T00:50:37Z</dc:date>	</item>	<item rdf:about="http://www.kdnuggets.com/2015/12/tensor-flow-terrific-deep-learning-library.html">		<title>TensorFlow is Terrific – A Sober Take on Deep Learning Acceleration</title>		<link>http://www.kdnuggets.com/2015/12/tensor-flow-terrific-deep-learning-library.html</link>		<dc:date>2016-01-07T00:43:58Z</dc:date>	</item>	<item rdf:about="https://bcomposes.wordpress.com/2015/11/26/simple-end-to-end-tensorflow:-examples/?utm_content=buffer46554&amp;utm_medium=social&amp;utm_source=twitter.com&amp;utm_campaign=buffer">		<title>Simple end-to-end TensorFlow examples | Bcomposes</title>		<link>https://bcomposes.wordpress.com/2015/11/26/simple-end-to-end-tensorflow:-examples/?utm_content=buffer46554&amp;utm_medium=social&amp;utm_source=twitter.com&amp;utm_campaign=buffer</link>		<dc:date>2015-12-21T19:05:46Z</dc:date>	</item>	<item rdf:about="http://googleresearch.blogspot.fr/2015/11/tensorflow-googles-latest-machine_9.html?m=1">		<title>Research Blog: TensorFlow - Google’s latest machine learning system, open sourced for everyone</title>		<link>http://googleresearch.blogspot.fr/2015/11/tensorflow-googles-latest-machine_9.html?m=1</link>		<dc:date>2015-11-09T18:52:15Z</dc:date>	</item>	<item rdf:about="http://download.tensorflow.org/paper/whitepaper2015.pdf">		<title>TensorFlow: Large-Scale Machine Learning on Heterogeneous Distributed Systems</title>		<link>http://download.tensorflow.org/paper/whitepaper2015.pdf</link>		<dc:date>2015-11-09T18:49:56Z</dc:date>	</item>	<item rdf:about="http://robohub.org/artificial-general-intelligence-that-plays-atari-video-games-how-did-deepmind-do-it/">		<title>Artificial General Intelligence that plays Atari video games: How did DeepMind do it? | Robohub</title>		<link>http://robohub.org/artificial-general-intelligence-that-plays-atari-video-games-how-did-deepmind-do-it/</link>		<dc:date>2014-09-26T16:38:02Z</dc:date>	</item>	<item rdf:about="http://recode.net/2014/05/27/googles-new-self-driving-car-ditches-the-steering-wheel/">		<title>Google Introduces New Self Driving Car at the Code Conference | Re/code</title>		<link>http://recode.net/2014/05/27/googles-new-self-driving-car-ditches-the-steering-wheel/</link>		<dc:date>2014-05-28T13:24:24Z</dc:date>	</item>	<item rdf:about="http://vancouverdata.blogspot.fr/2012/08/googles-self-driving-cars-are-going-to.html">		<title>Google’s Self-Driving Cars Are Going to Change Everything (Vancouver Data Blog by Neil McGuigan)</title>		<link>http://vancouverdata.blogspot.fr/2012/08/googles-self-driving-cars-are-going-to.html</link>		<dc:date>2013-09-05T11:47:43Z</dc:date>	</item>	<item rdf:about="http://www.lemonde.fr/technologies/article/2013/08/26/google-investit-dans-le-service-de-taxis-uber_3466504_651865.html#">		<title>Google investit dans le service de taxis Uber</title>		<link>http://www.lemonde.fr/technologies/article/2013/08/26/google-investit-dans-le-service-de-taxis-uber_3466504_651865.html#</link>		<dc:date>2013-08-26T23:28:51Z</dc:date>	</item>	<item rdf:about="http://www.newyorker.com/online/blogs/newsdesk/2012/11/google-driverless-car-morality.html">		<title>Google’s Driver-less Car and Morality : The New Yorker</title>		<link>http://www.newyorker.com/online/blogs/newsdesk/2012/11/google-driverless-car-morality.html</link>		<description>“Ethical subroutines” may sound like science fiction, but once upon a time, so did self-driving cars.		</description>		<dc:date>2012-11-30T22:26:18Z</dc:date>	</item>	<item rdf:about="http://googleresearch.blogspot.co.uk/2012/05/from-words-to-concepts-and-back.html">		<title>From Words to Concepts and Back: Dictionaries for Linking Text, Entities and Ideas</title>		<link>http://googleresearch.blogspot.co.uk/2012/05/from-words-to-concepts-and-back.html</link>		<dc:date>2012-05-22T12:04:25Z</dc:date>	</item>	<item rdf:about="http://blog.outer-court.com/archive/2005-05-22-n83.html">		<title>Google Translator: The Universal Language</title>		<link>http://blog.outer-court.com/archive/2005-05-22-n83.html</link>		<dc:date>2005-05-31</dc:date>	</item></rdf:RDF>