{"id":515,"date":"2021-08-09T18:41:13","date_gmt":"2021-08-10T02:41:13","guid":{"rendered":"https:\/\/blog.ligai.cn\/?p=515"},"modified":"2021-09-07T18:52:24","modified_gmt":"2021-09-08T02:52:24","slug":"%e6%a8%a1%e5%9e%8b%e8%ae%ad%e7%bb%83%e8%af%ad%e6%96%99%e5%b0%91%ef%bc%9f%e7%9f%a5%e8%af%86%e8%92%b8%e9%a6%8f%e8%a7%a3%e5%86%b3%e7%83%a6%e6%81%bc","status":"publish","type":"post","link":"https:\/\/ligai.cn\/blog\/sharing\/515.html","title":{"rendered":"\u6a21\u578b\u8bad\u7ec3\u8bed\u6599\u5c11\uff1f\u77e5\u8bc6\u84b8\u998f\u89e3\u51b3\u70e6\u607c"},"content":{"rendered":"\n<p>\u4f5c\u4e3a\u6df1\u5ea6\u5b66\u4e60\u7684\u57fa\u7840\uff0c\u4eba\u5de5\u795e\u7ecf\u7f51\u7edc\u6a21\u578b\u4e00\u76f4\u662f\u4eba\u5de5\u667a\u80fd\u7684\u7814\u7a76\u70ed\u70b9\uff0c\u4e5f\u5728\u5404\u4e2a\u9886\u57df\u53d1\u6325\u7740\u91cd\u8981\u7684\u4f5c\u7528\u3002\u597d\u7684\u6a21\u578b\u5f80\u5f80\u4f34\u968f\u7740\u5e9e\u5927\u7684\u4f53\u79ef\u548c\u6d77\u91cf\u7684\u53c2\u6570\uff0c\u8fd9\u4e0d\u5229\u4e8e\u6a21\u578b\u7684\u590d\u73b0\u548c\u90e8\u7f72\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u62ff\u81ea\u7136\u8bed\u8a00\u5904\u7406\u9886\u57df\uff08NLP\uff09\u4e2d\u6700\u5e38\u89c1\u7684\u8bcd\u5411\u91cf\u6a21\u578b\u6765\u4e3e\u4f8b\u3002\u60f3\u8981\u8bad\u7ec3\u4e00\u4e2a\u9ad8\u8d28\u91cf\u7684\u8bcd\u5411\u91cf\u6a21\u578b\uff0c\u5c31\u9700\u8981\u5927\u91cf\u4f18\u8d28\u7684\u8bed\u6599\u6765\u652f\u6301\u3002\u5bf9\u4e8e\u4f7f\u7528\u5e7f\u6cdb\u7684\u8bed\u79cd\uff0c\u6bd4\u5982\u82f1\u8bed\uff0c\u83b7\u53d6\u8bed\u6599\u548c\u8bad\u7ec3\u6a21\u578b\u76f8\u5bf9\u5bb9\u6613\u3002\u53cd\u89c2\u4e00\u4e9b\u5c0f\u8bed\u79cd\uff0c\u8bed\u6599\u5c11\uff0c\u83b7\u53d6\u96be\uff0c\u7eb5\u6709\u518d\u597d\u7684\u7f51\u7edc\u7ed3\u6784\uff0c\u8bad\u7ec3\u51fa\u6765\u7684\u6a21\u578b\u6027\u80fd\u4e5f\u4e0d\u5c3d\u4eba\u610f\u3002<\/p>\n\n\n\n<p><span style=\"color:#4c84ff\" class=\"has-inline-color\">\u96be\u9053\u5c0f\u8bed\u79cd\u7684NLP\u7814\u7a76\u5c31\u6ca1\u529e\u6cd5\u63a8\u8fdb\u4e0b\u53bb\u4e86\u5417\uff1f<\/span><\/p>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h1>\u77e5\u8bc6\u84b8\u998f<\/h1>\n\n\n\n<p>\u77e5\u8bc6\u84b8\u998f\u7b97\u6cd5\u4e3a\u89e3\u51b3\u8fd9\u7c7b\u95ee\u9898\u63d0\u4f9b\u4e86\u4e00\u79cd\u65b0\u7684\u601d\u8def\u3002\u5728\u5316\u5b66\u4e2d\uff0c\u84b8\u998f\u662f\u4e00\u4e2a\u6709\u6548\u7684\u5206\u79bb\u6cb8\u70b9\u4e0d\u540c\u7684\u7ec4\u5206\u7684\u65b9\u6cd5\uff0c\u540c\u7406\uff0c\u7b80\u5355\u5730\u7406\u89e3\u77e5\u8bc6\u84b8\u998f\uff0c\u5b83\u80fd\u5c06\u590d\u6742\u6a21\u578b\uff08Teacher\uff09\u4e2d\u7684\u77e5\u8bc6\u63d0\u53d6\u51fa\u6765\uff0c\u8fc1\u79fb\u5230\u53e6\u4e00\u4e2a\u8f7b\u91cf\u7ea7\u6a21\u578b\uff08Student\uff09\uff0c\u8fbe\u5230\u538b\u7f29\u6a21\u578b\u7684\u76ee\u7684\u3002\u8fd9\u79cd\u505a\u6cd5\u65e2\u51cf\u5c11\u4e86\u5bf9\u786c\u4ef6\u7684\u8981\u6c42\uff0c\u7f29\u77ed\u4e86\u8ba1\u7b97\u65f6\u95f4\uff0c\u53c8\u80fd\u5b66\u5230\u590d\u6742\u6a21\u578b\u4e2d\u7684\u6cdb\u5316\u80fd\u529b\uff0c\u5b9e\u73b0\u8fd1\u4f3c\u539f\u6a21\u578b\u7684\u6548\u679c\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<figure class=\"wp-block-image size-large\"><img loading=\"lazy\" width=\"1024\" height=\"430\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/R-C-1024x430.png\" alt=\"\" class=\"wp-image-516\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-1024x430.png 1024w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-300x126.png 300w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-768x322.png 768w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C.png 1080w\" sizes=\"(max-width: 1024px) 100vw, 1024px\" \/><\/figure>\n\n\n\n<p>\u901a\u8fc7\u8fd9\u79cd\u65b9\u6cd5\uff0c\u5c0f\u8bed\u79cd\u7684\u8bcd\u5411\u91cf\u6a21\u578b\u4e5f\u80fd\u50cf\u82f1\u8bed\u8bcd\u5411\u91cf\u6a21\u578b\u4e00\u6837\u51c6\u786e\u3002<\/p>\n\n\n\n<p>\u63a5\u4e0b\u6765\u8ba9\u6211\u4eec\u770b\u770b\uff0c\u5c0f\u6a21\u578b\u662f\u5982\u4f55\u89e3\u51b3\u5927\u95ee\u9898\u7684~<\/p>\n\n\n\n<div style=\"height:15px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h1>\u8bcd\u5411\u91cf\u662f\u4ec0\u4e48<\/h1>\n\n\n\n<p>\u4e00\u5207\u81ea\u7136\u8bed\u8a00\u5904\u7406\u4efb\u52a1\u90fd\u59cb\u4e8e\u6587\u672c\u5728\u8ba1\u7b97\u673a\u4e2d\u7684\u8868\u793a\u3002\u968f\u7740\u6587\u672c\u8868\u793a\u65b9\u6cd5\u7684\u4e0d\u65ad\u6f14\u53d8\uff0c\u76ee\u524d\u6700\u70ed\u95e8\u7684\u65b9\u6cd5\u5f53\u5c5e\u8bcd\u5411\u91cf\u6a21\u578b\u3002<span style=\"color:#4c84ff\" class=\"has-inline-color\">\u5b83\u7684\u6838\u5fc3\u601d\u60f3\u662f\u901a\u8fc7\u5927\u91cf\u7684\u8bed\u6599\u8bad\u7ec3\uff0c\u5c06\u8bcd\u8f6c\u5316\u4e3a\u7a20\u5bc6\u7684\u5411\u91cf\uff0c\u6620\u5c04\u5230\u5411\u91cf\u7a7a\u95f4\u4e2d\uff0c\u5e76\u4e14\u5bf9\u4e8e\u610f\u601d\u76f8\u8fd1\u7684\u8bcd\uff0c\u5b83\u4eec\u5bf9\u5e94\u7684\u5411\u91cf\u4e5f\u76f8\u8fd1\u3002<\/span><\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u00a0\u51c6\u786e\u7684\u8bcd\u5411\u91cf\u6a21\u578b\u662f\u5176\u4ed6\u4e0b\u6e38\u4efb\u52a1\uff08\u6587\u672c\u76f8\u4f3c\u5ea6\u3001\u60c5\u611f\u5206\u7c7b\u3001\u4fe1\u606f\u62bd\u53d6\u7b49\uff09\u7684\u57fa\u7840\u548c\u4fdd\u8bc1\u3002\u60f3\u8981\u83b7\u5f97\u4f18\u8d28\u7684\u8bcd\u5411\u91cf\u6a21\u578b\uff0c\u9700\u8981\u5927\u91cf\u9ad8\u8d28\u91cf\u7684\u8bed\u6599\u7684\u652f\u6301\u3002\u7136\u800c\uff0c\u7edd\u5927\u591a\u6570\u73b0\u6709\u7684\u8bcd\u5411\u91cf\u65b9\u6cd5\u90fd\u6709\u4e00\u4e2a\u5171\u540c\u70b9\u2014\u2014\u5b83\u4eec\u53ea\u652f\u6301\u5355\u8bed\u8a00\uff0c\u901a\u5e38\u662f\u82f1\u8bed\u3002\u8fd9\u4f7f\u5f97\u5b83\u4eec\u65e0\u6cd5\u76f4\u63a5\u5e94\u7528\u5230\u5176\u4ed6\u8bed\u8a00\u573a\u666f\u4e2d\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"662\" height=\"223\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/diagram-20190710.png\" alt=\"\" class=\"wp-image-517\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/diagram-20190710.png 662w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/diagram-20190710-300x101.png 300w\" sizes=\"(max-width: 662px) 100vw, 662px\" \/><\/figure>\n\n\n\n<p><\/p>\n\n\n\n<p>\u4e0a\u56fe\u5c55\u793a\u4e86\u4e00\u4e2a\u8bcd\u5411\u91cf\u7684\u7b80\u5355\u4f8b\u5b50\uff1a<br>\u8bcd\u5411\u91cf\u6a21\u578b\u53ef\u4ee5\u5c06\u201cking\u201d\uff0c\u201cqueen\u201d\uff0c\u201cman\u201d\uff0c\u201cwoman\u201d\u6620\u5c04\u5230\u4e00\u4e2a\u5411\u91cf\u7a7a\u95f4\u4e2d\u53bb\uff0c\u901a\u8fc7\u5411\u91cf\u7684\u57fa\u672c\u8fd0\u7b97\uff08\u4f8b\u5982\u76f8\u52a0\u548c\u76f8\u51cf\uff09\uff0c\u6211\u4eec\u53ef\u4ee5\u8ba1\u7b97\u51fa\u8fd9\u4e9b\u8bcd\u4e4b\u95f4\u7684\u8bed\u4e49\u5173\u7cfb\u3002\u5728\u8fd9\u4e2a\u4f8b\u5b50\u4e2d\uff0cqueen=king-man+woman<\/p>\n\n\n\n<div style=\"height:15px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<h1>\u8bcd\u5411\u91cf+\u77e5\u8bc6\u84b8\u998f=\uff1f<\/h1>\n\n\n\n<p>\u8fd1\u5e74\u6765\uff0c\u591a\u8bed\u79cd\u7684\u6587\u672c\u8868\u793a\u65b9\u6cd5\u53d7\u5230\u4e86\u5e7f\u6cdb\u7684\u5173\u6ce8\uff0c\u4eba\u4eec\u63d0\u51fa\u4e86\u5f88\u591a\u65b9\u6848\u8bd5\u56fe\u89e3\u51b3\u8fd9\u4e2a\u95ee\u9898\u3002\u5e38\u89c1\u7684\u65b9\u6cd5\u5c31\u662f\u5206\u522b\u7528\u6bcf\u79cd\u8bed\u8a00\u6765\u8bad\u7ec3\u5bf9\u5e94\u7684\u8bcd\u5411\u91cf\u6a21\u578b\uff0c\u4f46\u7531\u4e8e\u6709\u4e9b\u8bed\u79cd\u7684\u8bed\u6599\u5f88\u5c11\uff0c\u96be\u4ee5\u6ee1\u8db3\u6a21\u578b\u8bad\u7ec3\u7684\u8981\u6c42\u3002\u00a0<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u66fe\u6709\u5b66\u8005\u63d0\u51fa\u4f7f\u7528\u7ecf\u5178\u7684\u57fa\u4e8esequence-to-sequence\u7684encoder-decoder\u6846\u67b6\uff0c\u9996\u5148\u5b9e\u73b0\u4ece\u4e00\u79cd\u8bed\u8a00\u5230\u53e6\u4e00\u79cd\u8bed\u8a00\u7684\u673a\u5668\u7ffb\u8bd1\u6a21\u578b\uff0c\u7136\u540e\u5c06encoder\u5c42\u7684\u8f93\u51fa\u4f5c\u4e3a\u53e5\u5b50\u7684\u5411\u91cf\uff08LASER\u6a21\u578b\uff09\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u8fd9\u79cd\u6a21\u578b\u53ef\u4ee5\u5f88\u597d\u5730\u8bc6\u522b\u51fa\u88ab\u51c6\u786e\u7ffb\u8bd1\u6210\u4e0d\u540c\u8bed\u8a00\u7684\u53e5\u5b50\uff0c\u4f46\u662f\u5bf9\u4e8e\u6ca1\u6709\u88ab\u51c6\u786e\u7ffb\u8bd1\u7684\u53e5\u5b50\uff0c\u8be5\u6a21\u578b\u5f88\u96be\u8bc4\u4f30\u4ed6\u4eec\u7684\u76f8\u4f3c\u5ea6\u3002\u53e6\u5916\uff0c\u8bad\u7ec3\u8fd9\u79cd\u6a21\u578b\u4e5f\u9700\u8981\u6781\u9ad8\u7684\u786c\u4ef6\u6761\u4ef6\u652f\u6301\uff0c\u5f97\u5230\u4e00\u4e2a\u652f\u630193\u79cd\u8bed\u8a00\u7684\u8bcd\u5411\u91cf\u6a21\u578b\uff0c\u5927\u7ea6\u9700\u8981\u572816\u5757NVIDIA V100 GPUs\u4e0a\u8bad\u7ec35\u5929\u65f6\u95f4\u3002<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"892\" height=\"541\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/R-C-1.png\" alt=\"\" class=\"wp-image-518\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-1.png 892w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-1-300x182.png 300w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/R-C-1-768x466.png 768w\" sizes=\"(max-width: 892px) 100vw, 892px\" \/><\/figure>\n\n\n\n<p><\/p>\n\n\n\n<p>\u6700\u8fd1\uff0c\u6709\u76f8\u5173\u7814\u7a76\u4eba\u5458\u60f3\u5230\u4e86\u4e00\u4e2aidea\u6765\u89e3\u51b3\u8fd9\u4e2a\u95ee\u9898\uff1a\u5bf9\u4e8e\u610f\u601d\u76f8\u540c\uff0c\u4f46\u662f\u7ffb\u8bd1\u6210\u4e86\u4e0d\u540c\u8bed\u8a00\u7684\u53e5\u5b50\uff0c\u4ed6\u4eec\u6620\u5c04\u5230\u5411\u91cf\u7a7a\u95f4\u4e2d\u7684\u4f4d\u7f6e\u5e94\u8be5\u662f\u4e00\u81f4\u7684\u3002\u57fa\u4e8e\u6b64\uff0c\u4ed6\u63d0\u51fa\u4f7f\u7528\u77e5\u8bc6\u84b8\u998f\u7684\u65b9\u6cd5\uff0c\u5c06\u73b0\u6709\u7684\u5355\u8bed\u8a00\u8bcd\u5411\u91cf\u6a21\u578b\u6269\u5c55\u5230\u65b0\u7684\u8bed\u8a00\u4e0a\u53bb\u3002<\/p>\n\n\n\n<div style=\"height:13px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"554\" height=\"203\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/image3-1.png\" alt=\"\" class=\"wp-image-519\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/image3-1.png 554w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/image3-1-300x110.png 300w\" sizes=\"(max-width: 554px) 100vw, 554px\" \/><\/figure>\n\n\n\n<p><strong>\u6574\u4f53\u7684\u89e3\u51b3\u601d\u8def\u5982\u4e0b\uff1a<\/strong><\/p>\n\n\n\n<p>\u6211\u4eec\u9700\u8981\u4e00\u4e2a\u6e90\u8bed\u8a00\u7684\u8bcd\u5411\u91cf\u6a21\u578b\u4f5c\u4e3ateacher model\uff0c\u4e00\u4e2a\u4ece\u6e90\u8bed\u8a00s\u5230\u76ee\u6807\u8bed\u8a00t\u7684\u5e73\u884c\u8bed\u6599\u5e93((s<sub>1<\/sub>,t<sub>1<\/sub>\u00a0), \u2026 ,(s<sub>n\u00a0<\/sub>,\u00a0t<sub>n<\/sub>))\uff0c\u503c\u5f97\u6ce8\u610f\u7684\u662f\uff0ct<sub>i<\/sub>\u53ef\u4ee5\u662f\u4e0d\u540c\u7684\u8bed\u79cd\u3002\u7136\u540e\uff0c\u4f7f\u7528\u5747\u65b9\u8bef\u5dee\u4f5c\u4e3aloss\uff0c\u8bad\u7ec3\u4e00\u4e2astudent model\uff0c\u4f7f\u5f97student model\u7684\u8f93\u51fa\u5c3d\u53ef\u80fd\u5730\u9760\u8fd1teacher model\u3002\u00a0<\/p>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<p><\/p>\n\n\n\n<p>\u901a\u8fc7\u8fd9\u6837\u7684\u65b9\u5f0f\uff0cstudent model\u53ef\u4ee5\u5f88\u597d\u5730\u5b66\u4e60\u5230teacher model\u7684\u6cdb\u5316\u80fd\u529b\uff0c\u5e76\u4e14\u62e5\u6709\u4e24\u6761\u975e\u5e38\u91cd\u8981\u7684\u5c5e\u6027\uff1a<\/p>\n\n\n\n<p><strong>A. \u8de8\u8bed\u8a00\u7684\u5411\u91cf\u7a7a\u95f4\u4e5f\u5448\u73b0\u51fa\u4e00\u81f4\u6027\u3002\u6362\u8a00\u4e4b\uff0c\u7531\u540c\u4e00\u4e2a\u53e5\u5b50\u7ffb\u8bd1\u6210\u7684\u4e0d\u540c\u8bed\u8a00\uff0c\u5b83\u4eec\u7684\u5411\u91cf\u662f\u975e\u5e38\u63a5\u8fd1\u7684\uff1b<\/strong><\/p>\n\n\n\n<p><strong>B. teacher model\u4e2d\u6240\u5305\u542b\u7684\u5411\u91cf\u7a7a\u95f4\u7684\u5c5e\u6027\u80fd\u88ab\u5f88\u597d\u5730\u8f6c\u79fb\u5230\u5176\u4ed6\u8bed\u79cd\u4e2d\u53bb\u3002<\/strong>\u00a0<\/p>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<p><\/p>\n\n\n\n<p>\u76f8\u6bd4\u4e8e\u5176\u4ed6\u7684\u8bad\u7ec3\u591a\u8bed\u8a00\u8bcd\u5411\u91cf\u6a21\u578b\u7684\u65b9\u6cd5\uff0c\u8fd9\u79cd\u65b9\u6cd5\u6709\u5982\u4e0b\u51e0\u4e2a\u4f18\u70b9\uff1a\u200b<\/p>\n\n\n\n<p><strong>A. \u4ec5\u4ec5\u53ea\u9700\u8981\u975e\u5e38\u5c11\u91cf\u7684\u6837\u672c\uff0c\u5c31\u53ef\u4ee5\u5c06\u5df2\u6709\u7684\u6a21\u578b\u6269\u5c55\u5230\u65b0\u7684\u8bed\u8a00\u4e0a\u53bb\uff1b<\/strong><\/p>\n\n\n\n<p><strong>B. \u5b83\u53ef\u4ee5\u66f4\u5bb9\u6613\u5730\u5728\u5411\u91cf\u7a7a\u95f4\u4e2d\u786e\u4fdd\u8fbe\u5230\u9884\u671f\u7684\u6027\u80fd\uff1b<\/strong><\/p>\n\n\n\n<p><strong>C. \u6574\u4e2a\u8bad\u7ec3\u8fc7\u7a0b\u5bf9\u786c\u4ef6\u7684\u8981\u6c42\u975e\u5e38\u4f4e\u3002<\/strong><\/p>\n\n\n\n<div style=\"height:14px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<p><\/p>\n\n\n\n<p>\u4e3a\u4e86\u9a8c\u8bc1\u60f3\u6cd5\u7684\u53ef\u884c\u6027\uff0c\u76f8\u5173\u79d1\u7814\u4eba\u5458\u505a\u4e86\u4ee5\u4e0b\u7684\u5b9e\u9a8c\uff1a<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p><strong>\u5728\u6a21\u578b\u7684\u9009\u62e9\u65b9\u9762<\/strong>\uff0c\u4e3b\u8981\u4f7f\u7528\u82f1\u8bed\u7684SBERT\u6a21\u578b\u4f5c\u4e3ateacher model\uff0c\u4f7f\u7528XLM-RoBERTa\uff08XLM-R\uff09\u4f5c\u4e3astudent model\u3002\u82f1\u8bed\u7684SBERT\u6a21\u578b\u8bcd\u5178\u5305\u542b\u4e8630k\u7684\u82f1\u8bedtokens\uff0c\u5728\u591a\u4e2a\u53e5\u5411\u91cf\u4efb\u52a1\u4e2d\u53d6\u5f97\u4e86state-of-the-art\u7684\u6548\u679c\u3002XLM-R\u4f7f\u7528\u4e86SentencePiece\u4f5c\u4e3a\u6587\u672c\u5206\u8bcd\u5668\uff0c\u5f88\u597d\u5730\u56de\u907f\u4e86\u67d0\u4e9b\u8bed\u79cd\u6240\u9700\u8981\u7684\u7279\u6b8a\u9884\u5904\u7406\uff0c\u53ef\u4ee5\u76f4\u63a5\u5e94\u7528\u5230\u6240\u6709\u8bed\u8a00\u7684\u539f\u59cb\u6587\u672c\u4e0a\u53bb\uff0c\u6b64\u5916\uff0c\u5b83\u7684\u8bcd\u5178\u5305\u542b\u6765\u81ea\u4e00\u767e\u591a\u79cd\u8bed\u8a00\u7684250k\u7684tokens\uff0c\u975e\u5e38\u9002\u5408\u505a\u591a\u8bed\u8a00\u8bcd\u5411\u91cf\u6a21\u578b\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p><strong>\u5728\u8bad\u7ec3\u6570\u636e\u65b9\u9762<\/strong>\uff0c\u4e3b\u8981\u4f7f\u7528\u4e86\u4ee5\u4e0b\u7684\u6570\u636e\u96c6\uff1a<\/p>\n\n\n\n<figure class=\"wp-block-image size-full\"><img loading=\"lazy\" width=\"918\" height=\"119\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/Snipaste_2021-08-10_10-00-21.png\" alt=\"\" class=\"wp-image-521\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/Snipaste_2021-08-10_10-00-21.png 918w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/Snipaste_2021-08-10_10-00-21-300x39.png 300w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/Snipaste_2021-08-10_10-00-21-768x100.png 768w\" sizes=\"(max-width: 918px) 100vw, 918px\" \/><figcaption><em>Table1. Data set\u00a0in this experiment<\/em><\/figcaption><\/figure>\n\n\n\n<p>\u5bf9\u4e8e\u67d0\u4e9b\u8d44\u6e90\u8f83\u5c11\u7684\u8bed\u8a00\uff0c\u83b7\u53d6\u5e73\u884c\u8bed\u6599\u662f\u5f88\u56f0\u96be\u7684\uff0c\u56e0\u6b64\u4e5f\u4f7f\u7528\u4e86\u4e00\u4e9b\u53cc\u8bed\u8bcd\u5178\u6765\u6269\u5145\u8bed\u6599\uff0c\u672c\u6b21\u5b9e\u9a8c\u4e3b\u8981\u4f7f\u7528\u5230\u7684\u8bcd\u5178\u6709\u4ee5\u4e0b\u4e24\u4e2a\uff1a<\/p>\n\n\n\n<p>\u2022 MUSE<\/p>\n\n\n\n<p>\u2022 Wikititles\u00a0<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u4e3a\u4e86\u9a8c\u8bc1\u6a21\u578b\u6548\u679c\uff0c\u7814\u7a76\u4eba\u5458\u4e3b\u8981\u5728Multi- and Cross-lingual Semantic Textual Similarity (STS)\uff0cBitext Retrieval\u548cCross-lingual Similarity Search\u4e09\u4e2a\u4efb\u52a1\u4e0a\u8fdb\u884c\u4e86\u5b9e\u9a8c\u3002\u00a0<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p><strong>STS\u4efb\u52a1\u7684\u4e3b\u8981\u76ee\u6807\u662f\u4e3a\u4e00\u5bf9\u53e5\u5b50\u8f93\u51fa\u4e00\u4e2a\u503c\u6765\u53cd\u6620\u5b83\u4eec\u4e4b\u95f4\u7684\u8bed\u4e49\u76f8\u4f3c\u5ea6\u3002<\/strong>\u4f8b\u5982\uff0c\u53ef\u4ee5\u75280\u5206\u8868\u793a\u4e24\u4e2a\u53e5\u5b50\u5b8c\u5168\u65e0\u5173\uff0c5\u5206\u8868\u793a\u4e24\u4e2a\u53e5\u5b50\u7684\u8bed\u4e49\u5b8c\u5168\u4e00\u81f4\u3002<br>\u5728\u516c\u5f00\u6570\u636e\u96c6STS 2017 dataset\u4e0a\uff0c\u4f5c\u8005\u4f7f\u7528\u65af\u76ae\u5c14\u66fc\u7b49\u7ea7\u76f8\u5173\u7cfb\u6570\u6765\u8bc4\u4f30\u4e0d\u540c\u6a21\u578b\u7684\u6548\u679c\u3002\u5c06\u591a\u4e2a\u8bed\u79cd\u7684\u5f97\u5206\u53d6\u5e73\u5747\u503c\uff0c\u6211\u4eec\u53ef\u4ee5\u53d1\u73b0\uff0c\u84b8\u998f\u6a21\u578bXLM-R\u2190SBERT-paraphrases\u7684\u7ed3\u679c\uff0883.7\uff09\u4f18\u4e8e\u5176\u4ed6\u4f20\u7edf\u7b97\u6cd5\u6a21\u578bLASER\uff0867.0\uff09\uff0cmUSE\uff0881.1\uff09\uff0cLaBSE\uff0873.5\uff09\u3002\u00a0<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p><strong>Bitext Retrieval\u4efb\u52a1\u65e8\u5728\u4ece\u4e24\u4e2a\u4e0d\u540c\u8bed\u79cd\u7684\u8bed\u6599\u5e93\u4e2d\u8bc6\u522b\u51fa\u4e92\u4e3a\u7ffb\u8bd1\u7684\u53e5\u5b50\u5bf9\u3002<\/strong>\u4f5c\u8005\u4f7f\u7528BUCC mining task\u4e2d\u7684\u6570\u636e\u96c6\u6765\u8ba1\u7b97\u6a21\u578b\u7684F<sub>1<\/sub>\u503c\uff0c\u84b8\u998f\u6a21\u578bXLM-R\u2190SBERT-paraphrases\u7684\u7ed3\u679c\uff0888.6\uff09\u548c\u5176\u4ed6\u4f20\u7edf\u7b97\u6cd5\u6a21\u578bLASER\uff0893.0\uff09\uff0cmUSE\uff0887.7\uff09\uff0cLaBSE\uff0893.5\uff09\u4e92\u6709\u9ad8\u4f4e\u3002\u7531\u4e8e\u84b8\u998f\u6a21\u578b\u4f1a\u628a\u67d0\u4e9b\u8bed\u4e49\u76f8\u4f3c\u4f46\u662f\u7f3a\u5931\u90e8\u5206\u7ec6\u8282\u5143\u7d20\uff08\u4f8b\u5982\u65e5\u671f\uff0c\u5730\u70b9\u7b49\uff09\u7684\u53e5\u5b50\u4e5f\u5224\u65ad\u6210\u4e92\u4e3a\u7ffb\u8bd1\u7684\u53e5\u5b50\u5bf9\uff0c\u6240\u4ee5\u5728\u8fd9\u4e2a\u4efb\u52a1\u4e0b\uff0c\u5b83\u7684\u7ed3\u679c\u4e0d\u662f\u6700\u597d\u7684\u3002\u4f46\u662f\u8fd9\u4ecd\u7136\u80fd\u591f\u8bf4\u660e\uff0c\u5b83\u975e\u5e38\u9002\u5408\u5bfb\u627e\u8bed\u4e49\u76f8\u4f3c\u7684\u53e5\u5b50\u5bf9\u3002\u00a0<\/p>\n\n\n\n<div style=\"height:13px\" aria-hidden=\"true\" class=\"wp-block-spacer\"><\/div>\n\n\n\n<p><strong>\u5173\u4e8eCross-lingual Similarity Search\u4efb\u52a1\uff0c\u4f5c\u8005\u9009\u62e9\u4e86\u5728Tatoeba\u6570\u636e\u5e93\u4e0a\u8fdb\u884c\u5b9e\u9a8c\u3002<\/strong>\u8fd9\u4e2a\u4efb\u52a1\u9700\u8981\u627e\u5230\u4e0e\u6240\u6709\u6e90\u8bed\u8a00\u7684\u53e5\u5b50\u6700\u76f8\u4f3c\u7684\u5176\u4ed6\u8bed\u8a00\u7684\u53e5\u5b50\uff0c\u7ed3\u679c\u5982\u4e0b\uff1a<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter size-full\"><img loading=\"lazy\" width=\"464\" height=\"216\" src=\"https:\/\/blog.ligai.cn\/wp-content\/uploads\/2021\/08\/image4.png\" alt=\"\" class=\"wp-image-522\" srcset=\"https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/image4.png 464w, https:\/\/ligai.cn\/blog\/wp-content\/uploads\/2021\/08\/image4-300x140.png 300w\" sizes=\"(max-width: 464px) 100vw, 464px\" \/><figcaption><em>Table2. Tatoeba\u6d4b\u8bd5\u96c6\u4e2d\u7684\u51c6\u786e\u5ea6\u5b9e\u9a8c\u7ed3\u679c\uff08\u5305\u542b\u4ece\u82f1\u6587\u5230\u76ee\u6807\u8bed\u8a00\uff0c\u548c\u4ece\u76ee\u6807\u8bed\u8a00\u5230\u82f1\u6587\uff09<\/em><\/figcaption><\/figure><\/div>\n\n\n\n<p>\u53ef\u4ee5\u770b\u5230\u5728\u5c0f\u8bed\u79cd\u4e0a\uff0c\u84b8\u998f\u6a21\u578b\u7684\u6548\u679c\u8981\u660e\u663e\u597d\u4e8e\u4f20\u7edf\u6a21\u578b\u3002<\/p>\n\n\n\n<p><\/p>\n\n\n\n<p>\u00a0\u4eca\u5929\u548c\u5927\u5bb6\u5206\u4eab\u4e86\u4e00\u79cd\u57fa\u4e8e\u84b8\u998f\u601d\u60f3\u7684\u8bad\u7ec3\u8bcd\u5411\u91cf\u6a21\u578b\u7684\u65b9\u6cd5\uff0c\u53ef\u4ee5\u5b9e\u73b0\u5c06\u4e00\u4e2a\u9ad8\u8d28\u91cf\u7684\u5355\u8bed\u8a00\u6a21\u578b\u6269\u5c55\u5230\u5176\u4ed6\u8bed\u8a00\u4e0a\u53bb\u3002\u4ece\u5b9e\u9a8c\u7ed3\u679c\u6765\u770b\uff0c\u5bf9\u4e8e\u5e38\u89c1\u95ee\u9898\uff0c\u84b8\u998f\u6a21\u578b\u4e5f\u80fd\u6709\u4e0d\u9519\u7684\u6548\u679c\uff0c\u5728\u5c0f\u8bed\u79cd\u4e0a\u7684\u8868\u73b0\u5c24\u4e3a\u7a81\u51fa\uff0c\u540c\u65f6\u84b8\u998f\u6a21\u578b\u4e5f\u8282\u7701\u4e86\u5f88\u5927\u4e00\u90e8\u5206\u7684\u786c\u4ef6\u5f00\u652f\uff0c\u662f\u5f88\u503c\u5f97\u5c1d\u8bd5\u7684\u4e00\u79cd\u65b9\u6cd5\u3002<\/p>\n\n\n\n<p><br>\u540e\u7eed\u6211\u4eec\u8fd8\u4f1a\u5206\u4eab\u66f4\u591a\u76f8\u5173\u9886\u57df\u7684\u6587\u7ae0\uff0c\u671f\u5f85\u5927\u5bb6\u7684\u6301\u7eed\u5173\u6ce8~\u611f\u5174\u8da3\u7684\u5c0f\u4f19\u4f34\u53ef\u4ee5\u70b9\u51fb\u6211\u4eec\u7684\u5b98\u7f51 <a href=\"https:\/\/ligai.cn\/\">LigaAI-\u65b0\u4e00\u4ee3\u667a\u80fd\u7814\u53d1\u7ba1\u7406\u5e73\u53f0<\/a><\/p>\n\n\n\n<p><\/p>\n\n\n\n<p class=\"has-text-align-left\">\u53c2\u8003\u6587\u732e\uff1a<\/p>\n\n\n\n<p class=\"has-text-align-left\">[1]\u00a0 Reimers N ,\u00a0 Gurevych I . Making Monolingual Sentence Embeddings Multilingual using Knowledge Distillation[J].\u00a0 2020.<\/p>\n\n\n\n<p class=\"has-text-align-left\">[2]\u00a0 Artetxe M ,\u00a0 Schwenk H . Massively Multilingual Sentence Embeddings for Zero-Shot Cross-Lingual Transfer and Beyond[J].\u00a0 2019.<\/p>\n\n\n\n<p class=\"has-text-align-left\">[3]\u00a0 Chidambaram M ,\u00a0 Yang Y ,\u00a0 Cer D , et al. Learning Cross-Lingual Sentence Representations via a Multi-task Dual-Encoder Model[C]\/\/ Proceedings of the 4th Workshop on Representation Learning for NLP (RepL4NLP-2019). 2019.<\/p>\n\n\n\n<p class=\"has-text-align-left\">[4]\u00a0 Yang Y ,\u00a0 Cer D ,\u00a0 Ahmad A , et al. Multilingual Universal Sentence Encoder for Semantic Retrieval[J].\u00a0 2019.<\/p>\n\n\n\n<p class=\"has-text-align-left\">[5]\u00a0 Feng F ,\u00a0 Yang Y ,\u00a0 Cer D , et al. Language-agnostic BERT Sentence Embedding[J].\u00a0 2020.<\/p>\n\n\n\n<p class=\"has-text-align-left\">\u56fe\u6e90:https:\/\/www.ed.ac.uk\/informatics\/news-events\/stories\/2019\/king-man-woman-queen-the-hidden-algebraic-struct<\/p>\n","protected":false},"excerpt":{"rendered":"<p>\u4f5c\u4e3a\u6df1\u5ea6\u5b66\u4e60\u7684\u57fa\u7840\uff0c\u4eba\u5de5\u795e\u7ecf\u7f51\u7edc\u6a21\u578b\u4e00\u76f4\u662f\u4eba\u5de5\u667a\u80fd\u7684\u7814\u7a76\u70ed\u70b9\uff0c\u4e5f\u5728\u5404\u4e2a\u9886\u57df\u53d1\u6325\u7740\u91cd\u8981\u7684\u4f5c\u7528\u3002\u597d\u7684\u6a21\u578b\u5f80\u5f80\u4f34\u968f\u7740 &#8230; <a title=\"\u6a21\u578b\u8bad\u7ec3\u8bed\u6599\u5c11\uff1f\u77e5\u8bc6\u84b8\u998f\u89e3\u51b3\u70e6\u607c\" class=\"read-more\" href=\"https:\/\/ligai.cn\/blog\/sharing\/515.html\" aria-label=\"\u7ee7\u7eed\u9605\u8bfb\u6a21\u578b\u8bad\u7ec3\u8bed\u6599\u5c11\uff1f\u77e5\u8bc6\u84b8\u998f\u89e3\u51b3\u70e6\u607c\">\u9605\u8bfb\u66f4\u591a<\/a><\/p>\n","protected":false},"author":2,"featured_media":0,"comment_status":"closed","ping_status":"open","sticky":false,"template":"","format":"standard","meta":[],"categories":[22],"tags":[24,30,28,29],"_links":{"self":[{"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/posts\/515"}],"collection":[{"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/users\/2"}],"replies":[{"embeddable":true,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/comments?post=515"}],"version-history":[{"count":22,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/posts\/515\/revisions"}],"predecessor-version":[{"id":544,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/posts\/515\/revisions\/544"}],"wp:attachment":[{"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/media?parent=515"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/categories?post=515"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/ligai.cn\/blog\/wp-json\/wp\/v2\/tags?post=515"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}