Upgrade to Pro — share decks privately, control downloads, hide ads and more …

日本語文法誤り訂正における事前学習済みモデルを用いたデータ増強

 日本語文法誤り訂正における事前学習済みモデルを用いたデータ増強

言語処理学会第27回年次大会(NLP2021)で使用した発表資料になります.

hideyoshikato

March 20, 2021
Tweet

More Decks by hideyoshikato

Other Decks in Research

Transcript

  1. 2 /36 本⽇の発表内容 1. はじめに ー ⽂法誤り訂正(GEC)とは ー 近年のGEC研究 ー

    既存のデータ増強 ー 問題設定と⽬的 2. BERT-DA ー 提案するデータ増強 ー BERT-DA のメリット ー BERT-DA の位置づけ 3. 実験 ー 実験⽬的 ー 実験設定 ー 要素1︓⽣成元データの実験結果 ー 要素2︓疑似データ⽣成量の実験結果 ー 要素3︓置換するトークンの実験結果 ー 既存システムとの性能⽐較 ー 誤りタイプごとの評価 ー 複数候補⽂での評価 4. おわりに ー まとめ ー 今後の課題
  2. 5 /36 既存のデータ増強 Da = {(xa j , ya j

    )|j = 1, ..., m} <latexit sha1_base64="Z8wMZFq+ZD1S+zfhhkOASHUkSw0=">AAACs3ichVHLShxBFD12NJrJw0myCbiRDAYX0twWh4SAIHGTpY+MDkzrUN1TajvVD7prhkw68wNZC1m4UnAhfoDg1izyA1n4CSFLA9m48E5Pg6hoblN9T52651YdrhMpL9FEZwPGg8Ghh8MjjwqPnzx9Nlp8/mIlCVuxKytuqMK46ohEKi+QFe1pJatRLIXvKLnqNOd756ttGSdeGHzSnUiu+WIz8DY8V2im6kXT9oXecoVKq911MWunthOqRtLxOaWfu/XtdfF1e9aaMk1zyre79WKJTMpi/DawclBCHgth8Rg2GgjhogUfEgE0YwWBhL8aLBAi5taQMhcz8rJziS4KrG1xleQKwWyT/5u8q+VswPtezyRTu3yL4hWzchwT9IsO6Zx+0hH9pos7e6VZj95bOpydvlZG9dFvr5b//Vflc9bYulLdo3C4+n5PGht4l3nx2FuUMT2Xbr9/+8v38+X3SxPpG9qnP+xvj87olB0G7b/uwaJc2kWBB2TdHMdtsDJtWmWTFmdKcx/yUY1gDK8xyfN4izl8xAIqfO8OTnCKH0bZqBmO0eiXGgO55iWuheFfAn4Oos4=</latexit> Xa = {xa j |j = 1, ..., m} <latexit sha1_base64="Xll7OpWHTDR+zRpXO6HMnoSJpz8=">AAACs3ichVFNSxxBEH2OxphNohu9CLlIFoMHGWokixIQxFw8+pFVw44uPbOtjtvzwUzvwmayfyBnwUNOCjmE/ADBqx7yB3LwJ4QcDXjxkNrZgaCiqaGnXr+uV92PciLlJZroos/oH3g0+HjoSeHps+fDI8UXo+tJ2IxdWXFDFcabjkik8gJZ0Z5WcjOKpfAdJTecxrvu+UZLxokXBu91O5JbvtgNvB3PFZqpWtG0faH3XKHSD51tMW+nthOqetL2OaXtTm1/W3zan7emTdOc9u1OrVgik7KYuAusHJSQx3JYPIGNOkK4aMKHRADNWEEg4a8KC4SIuS2kzMWMvOxcooMCa5tcJblCMNvg/y7vqjkb8L7bM8nULt+ieMWsnMAk/aRvdEk/6Dv9out7e6VZj+5b2pydnlZGtZHP42tX/1X5nDX2/qkeUDhc/bAnjR3MZV489hZlTNel2+vf+nh4ufZ2dTJ9Tcf0m/0d0QWdscOg9cf9uiJXv6DAA7Juj+MuWJ8xrbJJK29KC4v5qIbwEq8wxfOYxQKWsIwK33uAU5zh3CgbVcMx6r1Soy/XjOFGGP5fgoOi0A==</latexit> Ya = {ya j |j = 1, ..., m} ︓疑似データ ︓ に含まれる⽂の数 <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya ︓擬似的に⽣成するデータ GECモデル 学習 ︓⽣成元データに含まれる正解⽂の集合 ︓ から⽣成した誤り⽂の集合 <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da 既存のデータ増強を⾏う場合 X = {xi |i = 1, ..., n} Y = {yi |i = 1, ..., n} ︓並列データ <latexit sha1_base64="6sOg/bIqXyEZk1Ilk6YryCMM0EQ=">AAACw3ichVFNSxtRFD0ZtWpqa6qbghsxWCyE4Y0ISiEQ2ixcxsQ0giNhZnzq0zcfzLwE43T+gH+gi65UXEh/QH+Am3bb0oU/oXRpoZsuejMZaFVq7/DmnnfePfe9w7UDKSLF2FVOGxoeeTA6Np5/OPHo8WThydTryO+EDm86vvTDDduKuBQebyqhJN8IQm65tuQt++BV/7zV5WEkfG9d9QK+5Vq7ntgRjqWIahfKpmupPceScTUpm/GCaftyO+q5lOLDpC1KfxM9Ip6/EWWjpOt6yTOTdqHIdJbG7F1gZKCILGp+4QNMbMOHgw5ccHhQhCUsRPRtwgBDQNwWYuJCQiI950iQJ22HqjhVWMQe0H+XdpsZ69G+3zNK1Q7dImmFpJzFPPvKLtg1+8jes2/s1z97xWmP/lt6lO2BlgftyeOnjZ//VbmUFfb+qO5R2FR9vyeFHaykXgR5C1Km79IZ9O8evb1uvKjPx8/YKftO/k7YFbskh173h3O+xuvvkKcBGbfHcRe0FnVjSTeMtaVi5WU2qzHMYA4LNJBlVLCKGpp08Rk+4TO+aFVtXws1NSjVcplmGjdCS34DwaypTw==</latexit> D = {(xi, yi)|i = 1, ..., n} ︓正解⽂の集合 例⽂︓私/は/猫/が/好き/です ︓誤り⽂の集合 例⽂︓私/は/猫/で/好き/です <latexit sha1_base64="FjfJczDPV1qoOyNL4pKCkMEVHBc=">AAAChHichVFNLwNRFD0GRX202EhsGg2xkOYWDbEQYWOprWqTamRmvNak85WZaZNq/AG2xMKKxEL8AD/Axh+w8BPEksTGwu10EqHBfXnvnXfePfe9k6vYuuZ6RE9dUndPb6ivfyA8ODQ8EomOju24Vs1RRU61dMspKLIrdM0UOU/zdFGwHSEbii7ySnWjdZ+vC8fVLHPba9iiZMgVUytrquwxlTb2onFKkB+xTpAMQBxBbFnRO+xiHxZU1GBAwITHWIcMl0cRSRBs5kpoMucw0vx7gSOEWVvjLMEZMrNVXit8KgasyedWTddXq/yKztNhZQzT9Eg39EoPdEvP9PFrraZfo/WXBu9KWyvsvcjxRPb9X5XBu4eDL9UfCoWz//bkoYxl34vG3myfablU2/Xrh+ev2ZXMdHOGruiF/V3SE92zQ7P+pl6nReYCYW5Q8mc7OsHOfCKZSlB6Mb62HrSqH5OYwiz3Ywlr2MQWcvyuwAlOcSaFpDlpQUq1U6WuQDOObyGtfgKtrI/o</latexit> m D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa n ︓ に含まれる⽂の数 D 学習 <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y GECモデル D データ増強を⾏わない場合
  3. 6 /36 既存のデータ増強 Da = {(xa j , ya j

    )|j = 1, ..., m} <latexit sha1_base64="Z8wMZFq+ZD1S+zfhhkOASHUkSw0=">AAACs3ichVHLShxBFD12NJrJw0myCbiRDAYX0twWh4SAIHGTpY+MDkzrUN1TajvVD7prhkw68wNZC1m4UnAhfoDg1izyA1n4CSFLA9m48E5Pg6hoblN9T52651YdrhMpL9FEZwPGg8Ghh8MjjwqPnzx9Nlp8/mIlCVuxKytuqMK46ohEKi+QFe1pJatRLIXvKLnqNOd756ttGSdeGHzSnUiu+WIz8DY8V2im6kXT9oXecoVKq911MWunthOqRtLxOaWfu/XtdfF1e9aaMk1zyre79WKJTMpi/DawclBCHgth8Rg2GgjhogUfEgE0YwWBhL8aLBAi5taQMhcz8rJziS4KrG1xleQKwWyT/5u8q+VswPtezyRTu3yL4hWzchwT9IsO6Zx+0hH9pos7e6VZj95bOpydvlZG9dFvr5b//Vflc9bYulLdo3C4+n5PGht4l3nx2FuUMT2Xbr9/+8v38+X3SxPpG9qnP+xvj87olB0G7b/uwaJc2kWBB2TdHMdtsDJtWmWTFmdKcx/yUY1gDK8xyfN4izl8xAIqfO8OTnCKH0bZqBmO0eiXGgO55iWuheFfAn4Oos4=</latexit> Xa = {xa j |j = 1, ..., m} <latexit sha1_base64="Xll7OpWHTDR+zRpXO6HMnoSJpz8=">AAACs3ichVFNSxxBEH2OxphNohu9CLlIFoMHGWokixIQxFw8+pFVw44uPbOtjtvzwUzvwmayfyBnwUNOCjmE/ADBqx7yB3LwJ4QcDXjxkNrZgaCiqaGnXr+uV92PciLlJZroos/oH3g0+HjoSeHps+fDI8UXo+tJ2IxdWXFDFcabjkik8gJZ0Z5WcjOKpfAdJTecxrvu+UZLxokXBu91O5JbvtgNvB3PFZqpWtG0faH3XKHSD51tMW+nthOqetL2OaXtTm1/W3zan7emTdOc9u1OrVgik7KYuAusHJSQx3JYPIGNOkK4aMKHRADNWEEg4a8KC4SIuS2kzMWMvOxcooMCa5tcJblCMNvg/y7vqjkb8L7bM8nULt+ieMWsnMAk/aRvdEk/6Dv9out7e6VZj+5b2pydnlZGtZHP42tX/1X5nDX2/qkeUDhc/bAnjR3MZV489hZlTNel2+vf+nh4ufZ2dTJ9Tcf0m/0d0QWdscOg9cf9uiJXv6DAA7Juj+MuWJ8xrbJJK29KC4v5qIbwEq8wxfOYxQKWsIwK33uAU5zh3CgbVcMx6r1Soy/XjOFGGP5fgoOi0A==</latexit> Ya = {ya j |j = 1, ..., m} ︓疑似データ <latexit sha1_base64="FjfJczDPV1qoOyNL4pKCkMEVHBc=">AAAChHichVFNLwNRFD0GRX202EhsGg2xkOYWDbEQYWOprWqTamRmvNak85WZaZNq/AG2xMKKxEL8AD/Axh+w8BPEksTGwu10EqHBfXnvnXfePfe9k6vYuuZ6RE9dUndPb6ivfyA8ODQ8EomOju24Vs1RRU61dMspKLIrdM0UOU/zdFGwHSEbii7ySnWjdZ+vC8fVLHPba9iiZMgVUytrquwxlTb2onFKkB+xTpAMQBxBbFnRO+xiHxZU1GBAwITHWIcMl0cRSRBs5kpoMucw0vx7gSOEWVvjLMEZMrNVXit8KgasyedWTddXq/yKztNhZQzT9Eg39EoPdEvP9PFrraZfo/WXBu9KWyvsvcjxRPb9X5XBu4eDL9UfCoWz//bkoYxl34vG3myfablU2/Xrh+ev2ZXMdHOGruiF/V3SE92zQ7P+pl6nReYCYW5Q8mc7OsHOfCKZSlB6Mb62HrSqH5OYwiz3Ywlr2MQWcvyuwAlOcSaFpDlpQUq1U6WuQDOObyGtfgKtrI/o</latexit> m︓ に含まれる⽂の数 <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya ︓擬似的に⽣成するデータ D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa ︓⽣成元データに含まれる正解⽂の集合 ︓ から⽣成した誤り⽂の集合 <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da ⽣成元データ ( のみで構成) <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya a 既存データ増強 (Direct Noiseなど) <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa ▪ Direct Noise︓直接ノイズを加える⼿法[Zhao 2019] 以下の4つの操作により誤りを発⽣させる 置換 10%の確率でランダムな単語に置換 10%の確率で削除 10%の確率で後ろにランダムな単語を挿⼊ 正規分布の確率値に基づいて語順を⼊れ替え 削除 挿⼊ ⼊れ替え 既存のデータ増強を⾏う場合
  4. 7 /36 問題設定と⽬的 既存のデータ増強は から を⽣成することで を獲得し学習に利⽤できるデータを増やす が増えることはない 問題︓特定のドメインで使⽤できるデータ量が限られている状況に対応できない ⽬的︓

    の⽣成も考慮に⼊れたデータ増強(BERT-DA)を提案しGECモデルのさらなる性能向上を⽬指す D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya
  5. 8 /36 本⽇の発表内容 1. はじめに ー ⽂法誤り訂正とは ー 近年のGEC研究 ー

    既存のデータ増強 ー 問題設定と⽬的 2. BERT-DA ー 提案するデータ増強 ー BERT-DA のメリット ー BERT-DA の位置づけ 3. 実験 ー 実験⽬的 ー 実験設定 ー 要素1︓⽣成元データの実験結果 ー 要素2︓疑似データ⽣成量の実験結果 ー 要素3︓置換するトークンの実験結果 ー 既存システムとの性能⽐較 ー 誤りタイプごとの評価 ー 複数候補⽂での評価 4. おわりに ー まとめ ー 今後の課題
  6. 9 /36 提案するデータ増強(BERT-DA) D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 Da

    = {(xa j , ya j )|j = 1, ..., m} <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="Z8wMZFq+ZD1S+zfhhkOASHUkSw0=">AAACs3ichVHLShxBFD12NJrJw0myCbiRDAYX0twWh4SAIHGTpY+MDkzrUN1TajvVD7prhkw68wNZC1m4UnAhfoDg1izyA1n4CSFLA9m48E5Pg6hoblN9T52651YdrhMpL9FEZwPGg8Ghh8MjjwqPnzx9Nlp8/mIlCVuxKytuqMK46ohEKi+QFe1pJatRLIXvKLnqNOd756ttGSdeGHzSnUiu+WIz8DY8V2im6kXT9oXecoVKq911MWunthOqRtLxOaWfu/XtdfF1e9aaMk1zyre79WKJTMpi/DawclBCHgth8Rg2GgjhogUfEgE0YwWBhL8aLBAi5taQMhcz8rJziS4KrG1xleQKwWyT/5u8q+VswPtezyRTu3yL4hWzchwT9IsO6Zx+0hH9pos7e6VZj95bOpydvlZG9dFvr5b//Vflc9bYulLdo3C4+n5PGht4l3nx2FuUMT2Xbr9/+8v38+X3SxPpG9qnP+xvj87olB0G7b/uwaJc2kWBB2TdHMdtsDJtWmWTFmdKcx/yUY1gDK8xyfN4izl8xAIqfO8OTnCKH0bZqBmO0eiXGgO55iWuheFfAn4Oos4=</latexit> Xa = {xa j |j = 1, ..., m} <latexit sha1_base64="Xll7OpWHTDR+zRpXO6HMnoSJpz8=">AAACs3ichVFNSxxBEH2OxphNohu9CLlIFoMHGWokixIQxFw8+pFVw44uPbOtjtvzwUzvwmayfyBnwUNOCjmE/ADBqx7yB3LwJ4QcDXjxkNrZgaCiqaGnXr+uV92PciLlJZroos/oH3g0+HjoSeHps+fDI8UXo+tJ2IxdWXFDFcabjkik8gJZ0Z5WcjOKpfAdJTecxrvu+UZLxokXBu91O5JbvtgNvB3PFZqpWtG0faH3XKHSD51tMW+nthOqetL2OaXtTm1/W3zan7emTdOc9u1OrVgik7KYuAusHJSQx3JYPIGNOkK4aMKHRADNWEEg4a8KC4SIuS2kzMWMvOxcooMCa5tcJblCMNvg/y7vqjkb8L7bM8nULt+ieMWsnMAk/aRvdEk/6Dv9out7e6VZj+5b2pydnlZGtZHP42tX/1X5nDX2/qkeUDhc/bAnjR3MZV489hZlTNel2+vf+nh4ufZ2dTJ9Tcf0m/0d0QWdscOg9cf9uiJXv6DAA7Juj+MuWJ8xrbJJK29KC4v5qIbwEq8wxfOYxQKWsIwK33uAU5zh3CgbVcMx6r1Soy/XjOFGGP5fgoOi0A==</latexit> Ya = {ya j |j = 1, ..., m} <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa ︓疑似データ <latexit sha1_base64="FjfJczDPV1qoOyNL4pKCkMEVHBc=">AAAChHichVFNLwNRFD0GRX202EhsGg2xkOYWDbEQYWOprWqTamRmvNak85WZaZNq/AG2xMKKxEL8AD/Axh+w8BPEksTGwu10EqHBfXnvnXfePfe9k6vYuuZ6RE9dUndPb6ivfyA8ODQ8EomOju24Vs1RRU61dMspKLIrdM0UOU/zdFGwHSEbii7ySnWjdZ+vC8fVLHPba9iiZMgVUytrquwxlTb2onFKkB+xTpAMQBxBbFnRO+xiHxZU1GBAwITHWIcMl0cRSRBs5kpoMucw0vx7gSOEWVvjLMEZMrNVXit8KgasyedWTddXq/yKztNhZQzT9Eg39EoPdEvP9PFrraZfo/WXBu9KWyvsvcjxRPb9X5XBu4eDL9UfCoWz//bkoYxl34vG3myfablU2/Xrh+ev2ZXMdHOGruiF/V3SE92zQ7P+pl6nReYCYW5Q8mc7OsHOfCKZSlB6Mb62HrSqH5OYwiz3Ywlr2MQWcvyuwAlOcSaFpDlpQUq1U6WuQDOObyGtfgKtrI/o</latexit> m︓ に含まれる⽂の数 <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya ︓擬似的に⽣成するデータ <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="WosBsmmFw3xSe42rxe/pxWzjOsw=">AAAC73ichVHLahRBFD3TvuL4yEQ3gpvBIRJhaKqDEhECIWYhuMnDSQLpZKjuVJJyqh921wyOlf4BfyCLgKiQRcgH+AFuXLpxkU8QlxHdCOZOT0OMg/E23ffWuefc6sP1YiVTzdhhyTp3/sLFS0OXy1euXrs+XBm5sZhG7cQXDT9SUbLs8VQoGYqGllqJ5TgRPPCUWPJaj3v9pY5IUhmFz3Q3FqsB3wzlhvS5JqhZee4GXG/5XJmZbM3wNePGiQxEllUnq64Zc71IrafdgJJ5mTVN6zSp/me/O9i/t92adOq2bdefulmzUmM2y6M6WDhFUUMRs1HlA1ysI4KPNgIIhNBUK3Ck9KzAAUNM2CoMYQlVMu8LZCiTtk0sQQxOaIu+m3RaKdCQzr2Zaa726RZFb0LKKkbZF7bPjtgndsC+sl//nGXyGb1/6VL2+loRN4df31r4+V9VQFlj60R1hsIj9tmeNDbwMPciyVucIz2Xfn9+59XO0cKj+VFzl71n38jfO3bIPpLDsPPd35sT87so04Kcv9cxWCyO284Dm83dr01NF6sawm3cwRjtYwJTeIJZNOjez/iB3yVYL6wd6431tk+1SoXmJk6FtX8MMh+8gg==</latexit> Da0 = {(xa0 k , ya0 k )|k = 1, ..., K} ︓疑似データ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa ︓ から⽣成した正解⽂の集合 ︓ から⽣成した誤り⽂の集合 ︓⽣成元データに含まれる正解⽂の集合 ︓ から⽣成した誤り⽂の集合 ︓ に含まれる⽂の数 ︓分析者が決めるパラメータ <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da BERT-DAを⾏う場合 <latexit sha1_base64="REXXpASbaBlA0QBZgYRzWrLu17Q=">AAACjnichVFNLwNRFD3Gd30VG4lNoyFWza34ikQ0bJqwQBVJicyMV17MV2Zem9D4A/ZiISEkFuIH+AE2/oCFnyCWJDYWbqeTCII7mffOO/eeO3PeNTxLBorosU6rb2hsam5pjbW1d3R2xbt7VgO35Jsib7qW668beiAs6Yi8ksoS654vdNuwxJqxN1fNr5WFH0jXWVH7nti09R1HFqWpK6YK89O5DSVtESQWtuJJSlEYiZ8gHYEkolh047fYwDZcmCjBhoADxdiCjoCfAtIgeMxtosKcz0iGeYFDxFhb4irBFTqze7zu8KkQsQ6fqz2DUG3yVyx+fVYmMEgPdE0vdE839ETvv/aqhD2q/7LPu1HTCm+r66gv9/avyuZdYfdT9YfC4Oq/PSkUMRl6kezNC5mqS7PWv3xw8pKbWh6sDNElPbO/C3qkO3bolF/NqyWxfIoYDyj9fRw/wepIKj2WoqXRZGY2GlUL+jGAYZ7HBDLIYhH58EaPcYZzLa6Na9PaTK1Uq4s0vfgSWvYDU2qTpA==</latexit> K = S ⇥ L <latexit sha1_base64="EDeoju0uVhwDQxrW53I3Qu3f21s=">AAACh3ichVG7SgNBFD1Z3/EVtRFsgiFiIfGu+MIqamNhEY2JgorsrpNkcV/sbgIx+AMWtgpWChbiB/gBNv6AhZ8glgo2Ft7dLIiKeoeZOXPmnjtzuKpj6J5P9BiTWlrb2js6u+LdPb19/YmBwaJnV11NFDTbsN0tVfGEoVui4Ou+IbYcVyimaohN9WA5uN+sCdfTbWvDrzti11TKll7SNcUPqPxEcnUvkaIMhZH8CeQIpBBFzk7cYgf7sKGhChMCFnzGBhR4PLYhg+Awt4sGcy4jPbwXOEKctVXOEpyhMHvAa5lP2xFr8Tmo6YVqjV8xeLqsTCJND3RNL3RPN/RE77/WaoQ1gr/UeVebWuHs9R8P59/+VZm8+6h8qv5QqJz9tycfJcyHXnT25oRM4FJr1q8dnr7kF9bTjTG6pGf2d0GPdMcOrdqrdrUm1s8R5wbJ39vxExSnMvJMhtamU9mlqFWdGMEoxrkfc8hiBTkU+N0KTnCKM6lLmpRmpflmqhSLNEP4EtLiByuBkIQ=</latexit> S, L <latexit sha1_base64="BeGWtfNDXIzTpkAD57kmvMsIjBI=">AAACuXichVFNTxRBEH2MX7igLHIx8ULYgB7IpIZgJBoSohePwLqAYXDTMzTsuD0fmendZBnnD/gHTOQECQfCD/Bq4gW4e+AnEI+YePFg7ewkBAlQk556/bpedb+UEykv0UQnfcat23fu3uu/XxoYfPBwqDz8aCkJW7Era26ownjFEYlUXiBr2tNKrkSxFL6j5LLTfNM9X27LOPHC4J3uRHLNF5uBt+G5QjNVL0/bvtANV6j0ffYhFU+zWTu1nVCtJx2fU9rJ6s2c/9SctSZN05ys2lm9XCGT8hi9DKwCVFDEfFj+BhvrCOGiBR8SATRjBYGEv1VYIETMrSFlLmbk5ecSGUqsbXGV5ArBbJP/m7xbLdiA992eSa52+RbFK2blKMbpJ+3TGR3SAZ3S3yt7pXmP7ls6nJ2eVkb1oc+Pq39uVPmcNRrnqmsUDldf70ljAzO5F4+9RTnTden2+re3vpxVXy6OpxO0S7/Y3w6d0A92GLR/u3sLcnEbJR6Q9f84LoOlKdN6btLCdGXudTGqfjzBGJ7xPF5gDm8xjxrf+xXfcYRj45UhjIbxsVdq9BWaEVwII/kHaTqlMg==</latexit> Ya0 = {ya0 k |k = 1, ..., S} <latexit sha1_base64="Jx96DEly73QYtOkfetvmfaVHH58=">AAACuXichVFNTxRBEH2MIrh+sMLFxAthg3ggkxqC0UBIiFw4AuvCJgxueoaGHbbnIzO9G5dh/oB/wEROmHgw/gCvJlzAuwd+gvGIiRcO1M5OYpSINemp16/rVfdLOZHyEk10NmDcuDl4a2j4dunO3Xv3R8oPRteTsB27suaGKozrjkik8gJZ055Wsh7FUviOkhtOa6l3vtGRceKFwUvdjeSWL3YDb8dzhWaqUZ61faGbrlBpPXuViqlswU5tJ1TbSdfnlL7OGq2cP2gtWNOmaU5X7axRrpBJeYxfBVYBKihiJSx/ho1thHDRhg+JAJqxgkDC3yYsECLmtpAyFzPy8nOJDCXWtrlKcoVgtsX/Xd5tFmzA+17PJFe7fIviFbNyHJP0jT7SOZ3QJ/pOF//sleY9em/pcnb6Whk1Rt48rP76r8rnrNH8rbpG4XD19Z40dvA89+Kxtyhnei7dfv/O/tvz6tzaZPqY3tMP9ndEZ3TMDoPOT/fDqlw7RIkHZP09jqtgfca0npq0OltZfFGMahiPMIEnPI9nWMQyVlDje9/hC07x1Zg3hNE09vqlxkChGcMfYSSXZLylMA==</latexit> Xa0 = {xa0 k |k = 1, ..., S} 既存のデータ増強を⾏う場合
  7. 10 /36 提案するデータ増強(BERT-DA) D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit

    sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="WosBsmmFw3xSe42rxe/pxWzjOsw=">AAAC73ichVHLahRBFD3TvuL4yEQ3gpvBIRJhaKqDEhECIWYhuMnDSQLpZKjuVJJyqh921wyOlf4BfyCLgKiQRcgH+AFuXLpxkU8QlxHdCOZOT0OMg/E23ffWuefc6sP1YiVTzdhhyTp3/sLFS0OXy1euXrs+XBm5sZhG7cQXDT9SUbLs8VQoGYqGllqJ5TgRPPCUWPJaj3v9pY5IUhmFz3Q3FqsB3wzlhvS5JqhZee4GXG/5XJmZbM3wNePGiQxEllUnq64Zc71IrafdgJJ5mTVN6zSp/me/O9i/t92adOq2bdefulmzUmM2y6M6WDhFUUMRs1HlA1ysI4KPNgIIhNBUK3Ck9KzAAUNM2CoMYQlVMu8LZCiTtk0sQQxOaIu+m3RaKdCQzr2Zaa726RZFb0LKKkbZF7bPjtgndsC+sl//nGXyGb1/6VL2+loRN4df31r4+V9VQFlj60R1hsIj9tmeNDbwMPciyVucIz2Xfn9+59XO0cKj+VFzl71n38jfO3bIPpLDsPPd35sT87so04Kcv9cxWCyO284Dm83dr01NF6sawm3cwRjtYwJTeIJZNOjez/iB3yVYL6wd6431tk+1SoXmJk6FtX8MMh+8gg==</latexit> Da0 = {(xa0 k , ya0 k )|k = 1, ..., K}︓疑似データ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 ︓ から⽣成した正解⽂の集合 ︓ から⽣成した誤り⽂の集合 の⽣成は2つのステップで⾏う 1ステップ⽬ の⽣成 2ステップ⽬ の⽣成 ︓ に含まれる⽂の数 ︓分析者が決めるパラメータ <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="REXXpASbaBlA0QBZgYRzWrLu17Q=">AAACjnichVFNLwNRFD3Gd30VG4lNoyFWza34ikQ0bJqwQBVJicyMV17MV2Zem9D4A/ZiISEkFuIH+AE2/oCFnyCWJDYWbqeTCII7mffOO/eeO3PeNTxLBorosU6rb2hsam5pjbW1d3R2xbt7VgO35Jsib7qW668beiAs6Yi8ksoS654vdNuwxJqxN1fNr5WFH0jXWVH7nti09R1HFqWpK6YK89O5DSVtESQWtuJJSlEYiZ8gHYEkolh047fYwDZcmCjBhoADxdiCjoCfAtIgeMxtosKcz0iGeYFDxFhb4irBFTqze7zu8KkQsQ6fqz2DUG3yVyx+fVYmMEgPdE0vdE839ETvv/aqhD2q/7LPu1HTCm+r66gv9/avyuZdYfdT9YfC4Oq/PSkUMRl6kezNC5mqS7PWv3xw8pKbWh6sDNElPbO/C3qkO3bolF/NqyWxfIoYDyj9fRw/wepIKj2WoqXRZGY2GlUL+jGAYZ7HBDLIYhH58EaPcYZzLa6Na9PaTK1Uq4s0vfgSWvYDU2qTpA==</latexit> K = S ⇥ L <latexit sha1_base64="EDeoju0uVhwDQxrW53I3Qu3f21s=">AAACh3ichVG7SgNBFD1Z3/EVtRFsgiFiIfGu+MIqamNhEY2JgorsrpNkcV/sbgIx+AMWtgpWChbiB/gBNv6AhZ8glgo2Ft7dLIiKeoeZOXPmnjtzuKpj6J5P9BiTWlrb2js6u+LdPb19/YmBwaJnV11NFDTbsN0tVfGEoVui4Ou+IbYcVyimaohN9WA5uN+sCdfTbWvDrzti11TKll7SNcUPqPxEcnUvkaIMhZH8CeQIpBBFzk7cYgf7sKGhChMCFnzGBhR4PLYhg+Awt4sGcy4jPbwXOEKctVXOEpyhMHvAa5lP2xFr8Tmo6YVqjV8xeLqsTCJND3RNL3RPN/RE77/WaoQ1gr/UeVebWuHs9R8P59/+VZm8+6h8qv5QqJz9tycfJcyHXnT25oRM4FJr1q8dnr7kF9bTjTG6pGf2d0GPdMcOrdqrdrUm1s8R5wbJ39vxExSnMvJMhtamU9mlqFWdGMEoxrkfc8hiBTkU+N0KTnCKM6lLmpRmpflmqhSLNEP4EtLiByuBkIQ=</latexit> S, L <latexit sha1_base64="BeGWtfNDXIzTpkAD57kmvMsIjBI=">AAACuXichVFNTxRBEH2MX7igLHIx8ULYgB7IpIZgJBoSohePwLqAYXDTMzTsuD0fmendZBnnD/gHTOQECQfCD/Bq4gW4e+AnEI+YePFg7ewkBAlQk556/bpedb+UEykv0UQnfcat23fu3uu/XxoYfPBwqDz8aCkJW7Era26ownjFEYlUXiBr2tNKrkSxFL6j5LLTfNM9X27LOPHC4J3uRHLNF5uBt+G5QjNVL0/bvtANV6j0ffYhFU+zWTu1nVCtJx2fU9rJ6s2c/9SctSZN05ys2lm9XCGT8hi9DKwCVFDEfFj+BhvrCOGiBR8SATRjBYGEv1VYIETMrSFlLmbk5ecSGUqsbXGV5ArBbJP/m7xbLdiA992eSa52+RbFK2blKMbpJ+3TGR3SAZ3S3yt7pXmP7ls6nJ2eVkb1oc+Pq39uVPmcNRrnqmsUDldf70ljAzO5F4+9RTnTden2+re3vpxVXy6OpxO0S7/Y3w6d0A92GLR/u3sLcnEbJR6Q9f84LoOlKdN6btLCdGXudTGqfjzBGJ7xPF5gDm8xjxrf+xXfcYRj45UhjIbxsVdq9BWaEVwII/kHaTqlMg==</latexit> Ya0 = {ya0 k |k = 1, ..., S} <latexit sha1_base64="Jx96DEly73QYtOkfetvmfaVHH58=">AAACuXichVFNTxRBEH2MIrh+sMLFxAthg3ggkxqC0UBIiFw4AuvCJgxueoaGHbbnIzO9G5dh/oB/wEROmHgw/gCvJlzAuwd+gvGIiRcO1M5OYpSINemp16/rVfdLOZHyEk10NmDcuDl4a2j4dunO3Xv3R8oPRteTsB27suaGKozrjkik8gJZ055Wsh7FUviOkhtOa6l3vtGRceKFwUvdjeSWL3YDb8dzhWaqUZ61faGbrlBpPXuViqlswU5tJ1TbSdfnlL7OGq2cP2gtWNOmaU5X7axRrpBJeYxfBVYBKihiJSx/ho1thHDRhg+JAJqxgkDC3yYsECLmtpAyFzPy8nOJDCXWtrlKcoVgtsX/Xd5tFmzA+17PJFe7fIviFbNyHJP0jT7SOZ3QJ/pOF//sleY9em/pcnb6Whk1Rt48rP76r8rnrNH8rbpG4XD19Z40dvA89+Kxtyhnei7dfv/O/tvz6tzaZPqY3tMP9ndEZ3TMDoPOT/fDqlw7RIkHZP09jqtgfca0npq0OltZfFGMahiPMIEnPI9nWMQyVlDje9/hC07x1Zg3hNE09vqlxkChGcMfYSSXZLylMA==</latexit> Xa0 = {xa0 k |k = 1, ..., S} <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 BERT-DAを⾏う場合 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0
  8. 11 /36 提案するデータ増強(BERT-DA) D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit

    sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="WosBsmmFw3xSe42rxe/pxWzjOsw=">AAAC73ichVHLahRBFD3TvuL4yEQ3gpvBIRJhaKqDEhECIWYhuMnDSQLpZKjuVJJyqh921wyOlf4BfyCLgKiQRcgH+AFuXLpxkU8QlxHdCOZOT0OMg/E23ffWuefc6sP1YiVTzdhhyTp3/sLFS0OXy1euXrs+XBm5sZhG7cQXDT9SUbLs8VQoGYqGllqJ5TgRPPCUWPJaj3v9pY5IUhmFz3Q3FqsB3wzlhvS5JqhZee4GXG/5XJmZbM3wNePGiQxEllUnq64Zc71IrafdgJJ5mTVN6zSp/me/O9i/t92adOq2bdefulmzUmM2y6M6WDhFUUMRs1HlA1ysI4KPNgIIhNBUK3Ck9KzAAUNM2CoMYQlVMu8LZCiTtk0sQQxOaIu+m3RaKdCQzr2Zaa726RZFb0LKKkbZF7bPjtgndsC+sl//nGXyGb1/6VL2+loRN4df31r4+V9VQFlj60R1hsIj9tmeNDbwMPciyVucIz2Xfn9+59XO0cKj+VFzl71n38jfO3bIPpLDsPPd35sT87so04Kcv9cxWCyO284Dm83dr01NF6sawm3cwRjtYwJTeIJZNOjez/iB3yVYL6wd6431tk+1SoXmJk6FtX8MMh+8gg==</latexit> Da0 = {(xa0 k , ya0 k )|k = 1, ..., K}︓疑似データ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 ︓ から⽣成した正解⽂の集合 ︓ から⽣成した誤り⽂の集合 の⽣成は2つのステップで⾏う 1ステップ⽬ の⽣成 2ステップ⽬ の⽣成 ︓ に含まれる⽂の数 ︓分析者が決めるパラメータ <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="REXXpASbaBlA0QBZgYRzWrLu17Q=">AAACjnichVFNLwNRFD3Gd30VG4lNoyFWza34ikQ0bJqwQBVJicyMV17MV2Zem9D4A/ZiISEkFuIH+AE2/oCFnyCWJDYWbqeTCII7mffOO/eeO3PeNTxLBorosU6rb2hsam5pjbW1d3R2xbt7VgO35Jsib7qW668beiAs6Yi8ksoS654vdNuwxJqxN1fNr5WFH0jXWVH7nti09R1HFqWpK6YK89O5DSVtESQWtuJJSlEYiZ8gHYEkolh047fYwDZcmCjBhoADxdiCjoCfAtIgeMxtosKcz0iGeYFDxFhb4irBFTqze7zu8KkQsQ6fqz2DUG3yVyx+fVYmMEgPdE0vdE839ETvv/aqhD2q/7LPu1HTCm+r66gv9/avyuZdYfdT9YfC4Oq/PSkUMRl6kezNC5mqS7PWv3xw8pKbWh6sDNElPbO/C3qkO3bolF/NqyWxfIoYDyj9fRw/wepIKj2WoqXRZGY2GlUL+jGAYZ7HBDLIYhH58EaPcYZzLa6Na9PaTK1Uq4s0vfgSWvYDU2qTpA==</latexit> K = S ⇥ L <latexit sha1_base64="EDeoju0uVhwDQxrW53I3Qu3f21s=">AAACh3ichVG7SgNBFD1Z3/EVtRFsgiFiIfGu+MIqamNhEY2JgorsrpNkcV/sbgIx+AMWtgpWChbiB/gBNv6AhZ8glgo2Ft7dLIiKeoeZOXPmnjtzuKpj6J5P9BiTWlrb2js6u+LdPb19/YmBwaJnV11NFDTbsN0tVfGEoVui4Ou+IbYcVyimaohN9WA5uN+sCdfTbWvDrzti11TKll7SNcUPqPxEcnUvkaIMhZH8CeQIpBBFzk7cYgf7sKGhChMCFnzGBhR4PLYhg+Awt4sGcy4jPbwXOEKctVXOEpyhMHvAa5lP2xFr8Tmo6YVqjV8xeLqsTCJND3RNL3RPN/RE77/WaoQ1gr/UeVebWuHs9R8P59/+VZm8+6h8qv5QqJz9tycfJcyHXnT25oRM4FJr1q8dnr7kF9bTjTG6pGf2d0GPdMcOrdqrdrUm1s8R5wbJ39vxExSnMvJMhtamU9mlqFWdGMEoxrkfc8hiBTkU+N0KTnCKM6lLmpRmpflmqhSLNEP4EtLiByuBkIQ=</latexit> S, L <latexit sha1_base64="BeGWtfNDXIzTpkAD57kmvMsIjBI=">AAACuXichVFNTxRBEH2MX7igLHIx8ULYgB7IpIZgJBoSohePwLqAYXDTMzTsuD0fmendZBnnD/gHTOQECQfCD/Bq4gW4e+AnEI+YePFg7ewkBAlQk556/bpedb+UEykv0UQnfcat23fu3uu/XxoYfPBwqDz8aCkJW7Era26ownjFEYlUXiBr2tNKrkSxFL6j5LLTfNM9X27LOPHC4J3uRHLNF5uBt+G5QjNVL0/bvtANV6j0ffYhFU+zWTu1nVCtJx2fU9rJ6s2c/9SctSZN05ys2lm9XCGT8hi9DKwCVFDEfFj+BhvrCOGiBR8SATRjBYGEv1VYIETMrSFlLmbk5ecSGUqsbXGV5ArBbJP/m7xbLdiA992eSa52+RbFK2blKMbpJ+3TGR3SAZ3S3yt7pXmP7ls6nJ2eVkb1oc+Pq39uVPmcNRrnqmsUDldf70ljAzO5F4+9RTnTden2+re3vpxVXy6OpxO0S7/Y3w6d0A92GLR/u3sLcnEbJR6Q9f84LoOlKdN6btLCdGXudTGqfjzBGJ7xPF5gDm8xjxrf+xXfcYRj45UhjIbxsVdq9BWaEVwII/kHaTqlMg==</latexit> Ya0 = {ya0 k |k = 1, ..., S} <latexit sha1_base64="Jx96DEly73QYtOkfetvmfaVHH58=">AAACuXichVFNTxRBEH2MIrh+sMLFxAthg3ggkxqC0UBIiFw4AuvCJgxueoaGHbbnIzO9G5dh/oB/wEROmHgw/gCvJlzAuwd+gvGIiRcO1M5OYpSINemp16/rVfdLOZHyEk10NmDcuDl4a2j4dunO3Xv3R8oPRteTsB27suaGKozrjkik8gJZ055Wsh7FUviOkhtOa6l3vtGRceKFwUvdjeSWL3YDb8dzhWaqUZ61faGbrlBpPXuViqlswU5tJ1TbSdfnlL7OGq2cP2gtWNOmaU5X7axRrpBJeYxfBVYBKihiJSx/ho1thHDRhg+JAJqxgkDC3yYsECLmtpAyFzPy8nOJDCXWtrlKcoVgtsX/Xd5tFmzA+17PJFe7fIviFbNyHJP0jT7SOZ3QJ/pOF//sleY9em/pcnb6Whk1Rt48rP76r8rnrNH8rbpG4XD19Z40dvA89+Kxtyhnei7dfv/O/tvz6tzaZPqY3tMP9ndEZ3TMDoPOT/fDqlw7RIkHZP09jqtgfca0npq0OltZfFGMahiPMIEnPI9nWMQyVlDje9/hC07x1Zg3hNE09vqlxkChGcMfYSSXZLylMA==</latexit> Xa0 = {xa0 k |k = 1, ..., S} <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 BERT-DAを⾏う場合 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 私 [MASK] 猫 が 好き です BERT 私 、 猫 が 好き です 私 も 猫 が 好き です ︙ 私 だって 猫 が 好き です 私 ⿊ 猫 が 好き です ya j <latexit sha1_base64="5oPvZ9o77Z7plwDeFkKwFpkvyog=">AAAC3HicSyrIySwuMTC4ycjEzMLKxs7BycXNw8vHLyAoFFacX1qUnBqanJ+TXxSRlFicmpOZlxpaklmSkxpRUJSamJuUkxqelO0Mkg8vSy0qzszPCympLEiNzU1Mz8tMy0xOLAEKxQuExyTl56QUV+YCqerK2vjq7Nq46kR1DUPNWh0cUkYgqZT8kmIFXCp8NGvjBZQN9AzAQAGTYQhlKDNAQUC+wHaGGIYUhnyGZIZShlyGVIY8hhIgO4chkaEYCKMZDBkMGAqAYrEM1UCxIiArEyyfylDLwAXUWwpUlQpUkQgUzQaS6UBeNFQ0D8gHmVkM1p0MtCUHiIuAOhUYVA2uGqw0+GxwwmC1wUuDPzjNqgabAXJLJZBOguhNLYjn75II/k5QVy6QLmHIQOjCoyMJqBq/n0oY0hgswH7JBPqtACwC8mUyxPyyqumfg62CVKvVDBYZvAb6b6HBTYPDQB/mlX1JXhqYGjSbgQsYQYbo0YHJCDPSMzTVMwg0UXZwgkYVB4M0gxKDBjA+zBkcGDwYAhhCgfbuZXjM8IbhLVMcUwtTN1MvRCkTI1SPMAMKYJoGAH2btFA=</latexit> ya0(1) k , ya0(2) k , . . . , ya0(L) k の⽣成 BERTのMasked LMにより置換 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0
  9. 12 /36 提案するデータ増強(BERT-DA) D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit

    sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="WosBsmmFw3xSe42rxe/pxWzjOsw=">AAAC73ichVHLahRBFD3TvuL4yEQ3gpvBIRJhaKqDEhECIWYhuMnDSQLpZKjuVJJyqh921wyOlf4BfyCLgKiQRcgH+AFuXLpxkU8QlxHdCOZOT0OMg/E23ffWuefc6sP1YiVTzdhhyTp3/sLFS0OXy1euXrs+XBm5sZhG7cQXDT9SUbLs8VQoGYqGllqJ5TgRPPCUWPJaj3v9pY5IUhmFz3Q3FqsB3wzlhvS5JqhZee4GXG/5XJmZbM3wNePGiQxEllUnq64Zc71IrafdgJJ5mTVN6zSp/me/O9i/t92adOq2bdefulmzUmM2y6M6WDhFUUMRs1HlA1ysI4KPNgIIhNBUK3Ck9KzAAUNM2CoMYQlVMu8LZCiTtk0sQQxOaIu+m3RaKdCQzr2Zaa726RZFb0LKKkbZF7bPjtgndsC+sl//nGXyGb1/6VL2+loRN4df31r4+V9VQFlj60R1hsIj9tmeNDbwMPciyVucIz2Xfn9+59XO0cKj+VFzl71n38jfO3bIPpLDsPPd35sT87so04Kcv9cxWCyO284Dm83dr01NF6sawm3cwRjtYwJTeIJZNOjez/iB3yVYL6wd6431tk+1SoXmJk6FtX8MMh+8gg==</latexit> Da0 = {(xa0 k , ya0 k )|k = 1, ..., K}︓疑似データ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 ︓ から⽣成した正解⽂の集合 ︓ から⽣成した誤り⽂の集合 の⽣成は2つのステップで⾏う 1ステップ⽬ の⽣成 2ステップ⽬ の⽣成 ︓ に含まれる⽂の数 ︓分析者が決めるパラメータ <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="REXXpASbaBlA0QBZgYRzWrLu17Q=">AAACjnichVFNLwNRFD3Gd30VG4lNoyFWza34ikQ0bJqwQBVJicyMV17MV2Zem9D4A/ZiISEkFuIH+AE2/oCFnyCWJDYWbqeTCII7mffOO/eeO3PeNTxLBorosU6rb2hsam5pjbW1d3R2xbt7VgO35Jsib7qW668beiAs6Yi8ksoS654vdNuwxJqxN1fNr5WFH0jXWVH7nti09R1HFqWpK6YK89O5DSVtESQWtuJJSlEYiZ8gHYEkolh047fYwDZcmCjBhoADxdiCjoCfAtIgeMxtosKcz0iGeYFDxFhb4irBFTqze7zu8KkQsQ6fqz2DUG3yVyx+fVYmMEgPdE0vdE839ETvv/aqhD2q/7LPu1HTCm+r66gv9/avyuZdYfdT9YfC4Oq/PSkUMRl6kezNC5mqS7PWv3xw8pKbWh6sDNElPbO/C3qkO3bolF/NqyWxfIoYDyj9fRw/wepIKj2WoqXRZGY2GlUL+jGAYZ7HBDLIYhH58EaPcYZzLa6Na9PaTK1Uq4s0vfgSWvYDU2qTpA==</latexit> K = S ⇥ L <latexit sha1_base64="EDeoju0uVhwDQxrW53I3Qu3f21s=">AAACh3ichVG7SgNBFD1Z3/EVtRFsgiFiIfGu+MIqamNhEY2JgorsrpNkcV/sbgIx+AMWtgpWChbiB/gBNv6AhZ8glgo2Ft7dLIiKeoeZOXPmnjtzuKpj6J5P9BiTWlrb2js6u+LdPb19/YmBwaJnV11NFDTbsN0tVfGEoVui4Ou+IbYcVyimaohN9WA5uN+sCdfTbWvDrzti11TKll7SNcUPqPxEcnUvkaIMhZH8CeQIpBBFzk7cYgf7sKGhChMCFnzGBhR4PLYhg+Awt4sGcy4jPbwXOEKctVXOEpyhMHvAa5lP2xFr8Tmo6YVqjV8xeLqsTCJND3RNL3RPN/RE77/WaoQ1gr/UeVebWuHs9R8P59/+VZm8+6h8qv5QqJz9tycfJcyHXnT25oRM4FJr1q8dnr7kF9bTjTG6pGf2d0GPdMcOrdqrdrUm1s8R5wbJ39vxExSnMvJMhtamU9mlqFWdGMEoxrkfc8hiBTkU+N0KTnCKM6lLmpRmpflmqhSLNEP4EtLiByuBkIQ=</latexit> S, L <latexit sha1_base64="BeGWtfNDXIzTpkAD57kmvMsIjBI=">AAACuXichVFNTxRBEH2MX7igLHIx8ULYgB7IpIZgJBoSohePwLqAYXDTMzTsuD0fmendZBnnD/gHTOQECQfCD/Bq4gW4e+AnEI+YePFg7ewkBAlQk556/bpedb+UEykv0UQnfcat23fu3uu/XxoYfPBwqDz8aCkJW7Era26ownjFEYlUXiBr2tNKrkSxFL6j5LLTfNM9X27LOPHC4J3uRHLNF5uBt+G5QjNVL0/bvtANV6j0ffYhFU+zWTu1nVCtJx2fU9rJ6s2c/9SctSZN05ys2lm9XCGT8hi9DKwCVFDEfFj+BhvrCOGiBR8SATRjBYGEv1VYIETMrSFlLmbk5ecSGUqsbXGV5ArBbJP/m7xbLdiA992eSa52+RbFK2blKMbpJ+3TGR3SAZ3S3yt7pXmP7ls6nJ2eVkb1oc+Pq39uVPmcNRrnqmsUDldf70ljAzO5F4+9RTnTden2+re3vpxVXy6OpxO0S7/Y3w6d0A92GLR/u3sLcnEbJR6Q9f84LoOlKdN6btLCdGXudTGqfjzBGJ7xPF5gDm8xjxrf+xXfcYRj45UhjIbxsVdq9BWaEVwII/kHaTqlMg==</latexit> Ya0 = {ya0 k |k = 1, ..., S} <latexit sha1_base64="Jx96DEly73QYtOkfetvmfaVHH58=">AAACuXichVFNTxRBEH2MIrh+sMLFxAthg3ggkxqC0UBIiFw4AuvCJgxueoaGHbbnIzO9G5dh/oB/wEROmHgw/gCvJlzAuwd+gvGIiRcO1M5OYpSINemp16/rVfdLOZHyEk10NmDcuDl4a2j4dunO3Xv3R8oPRteTsB27suaGKozrjkik8gJZ055Wsh7FUviOkhtOa6l3vtGRceKFwUvdjeSWL3YDb8dzhWaqUZ61faGbrlBpPXuViqlswU5tJ1TbSdfnlL7OGq2cP2gtWNOmaU5X7axRrpBJeYxfBVYBKihiJSx/ho1thHDRhg+JAJqxgkDC3yYsECLmtpAyFzPy8nOJDCXWtrlKcoVgtsX/Xd5tFmzA+17PJFe7fIviFbNyHJP0jT7SOZ3QJ/pOF//sleY9em/pcnb6Whk1Rt48rP76r8rnrNH8rbpG4XD19Z40dvA89+Kxtyhnei7dfv/O/tvz6tzaZPqY3tMP9ndEZ3TMDoPOT/fDqlw7RIkHZP09jqtgfca0npq0OltZfFGMahiPMIEnPI9nWMQyVlDje9/hC07x1Zg3hNE09vqlxkChGcMfYSSXZLylMA==</latexit> Xa0 = {xa0 k |k = 1, ..., S} <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 BERT-DAを⾏う場合 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 の⽣成 既存のデータ増強(Direct Noise)を採⽤ ▪ Direct Noise︓直接ノイズを加える⼿法[Zhao 2019] 以下の4つの操作により誤りを発⽣させる 置換 10%の確率でランダムな単語に置換 10%の確率で削除 10%の確率で後ろにランダムな単語を挿⼊ 正規分布の確率値に基づいて語順を⼊れ替え 削除 挿⼊ ⼊れ替え
  10. 14 /36 BERT-DAの位置付け D <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y

    D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 データ増強なし Direct Noise(既存) BERT-DA(提案) GECモデル 学習 学習 学習 ︓擬似的に⽣成するデータ
  11. 15 /36 本⽇の発表内容 1. はじめに ー ⽂法誤り訂正とは ー 近年のGEC研究 ー

    既存のデータ増強 ー 問題設定と⽬的 2. BERT-DA ー 提案するデータ増強 ー BERT-DA のメリット ー BERT-DA の位置づけ 3. 実験 ー 実験⽬的 ー 実験設定 ー 要素1︓⽣成元データの実験結果 ー 要素2︓疑似データ⽣成量の実験結果 ー 要素3︓置換するトークンの実験結果 ー 既存システムとの性能⽐較 ー 誤りタイプごとの評価 ー 複数候補⽂での評価 4. おわりに ー まとめ ー 今後の課題
  12. 16 /36 実験⽬的 ⽬的(1) 既存モデルと提案モデル*の性能を評価しBERT-DAの有効性を⽰すこと *提案モデル︓BERT-DAを⾏ったGECモデル ⽬的(2) BERT-DAがより効果的に作⽤する知⾒**を得ること **効果的に作⽤する知⾒︓以下3つの要素に対して実験を⾏う 要素1︓⽣成元データ

    をどのドメインのデータから⽣成するのが有効か︖ 要素2︓データ⽣成量 パラメータ をどこまで増やせば性能向上が期待できるか︖ 要素3︓置換するトークン どの品詞を置換するのが性能向上につながるか︖ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="MBc4LCTc0txC9+QuLAS9nq2V6Zo=">AAAChHichVFNLwNRFD3Gd3202EhsGg2xkOYOGmIhwsbCoi1FgsjMeHjpdGYyM21SjT/AllhYkViIH+AH2PgDFv0JYkliY+HOdBJBcF/ee+edd8997+Tqjik9n6jepDS3tLa1d3TGurp7euOJvv41zy67higYtmm7G7rmCVNaouBL3xQbjiu0km6Kdb24GNyvV4TrSdta9auO2C5p+5bck4bmM5Vb3kmkKE1hJH8CNQIpRJG1E3fYwi5sGCijBAELPmMTGjwem1BBcJjbRo05l5EM7wWOEGNtmbMEZ2jMFnnd59NmxFp8Dmp6odrgV0yeLiuTGKFHuqEXeqBbeqL3X2vVwhrBX6q86w2tcHbix4Mrb/+qSrz7OPhU/aHQOftvTz72MBN6kezNCZnApdGoXzk8f1mZzY/URumKntnfJdXpnh1alVfjOifyF4hxg9Tv7fgJ1ibSaiZNuanU/ELUqg4MYRhj3I9pzGMJWRT4XYETnOJMaVPGlUkl00hVmiLNAL6EMvcBZ4yPxw==</latexit> L
  13. 17 /36 要素1 ⽣成元データ ⽣成元データとして複数のドメインを持つデータで実験を⾏い、⽐較検討を⾏う 本研究では、BCCWJ[Maekawa 2014] から抽出した ・BCCWJ-LB・・・図書館書籍ドメイン ・BCCWJ-PB・・・出版書籍

    ドメイン ・BCCWJ-OC・・・Yahoo!知恵袋ドメイン という3種類のデータで⽐較検討 [Maekawa 2014] Kikuo Maekawa, Makoto Yamazaki, Toshinobu Ogiso, Takehiko Maruyama, Hideki Ogura, Wakako Kashino, Hanae Koiso, Masaya Yamaguchi, Makiro Tanaka, and Yasuharu Den. Balanced corpus of contemporary written japanese. Language resources and evaluation, Vol. 48, No. 2, pp. 345–371, 2014. 私 [MASK] 猫 が 好き です BERT L = 1 私 、 猫 が 好き です L = 2 私 も 猫 が 好き です L = 3 私 だって 猫 が 好き です L = 4 私 ⿊ 猫 が 好き です ︙ 学習に利⽤する どのドメインから⽣成する︖
  14. 18 /36 要素2 データ⽣成量 BERT-DAは、BERT[tohoku BERT] の Masked LM によってマスクされたトークンに対する予測トークンを⽤いてい

    るため,𝐿 番⽬までの出⼒候補の⽂を学習に利⽤することが可能 本研究では、 の場合を⽐較検討 <latexit sha1_base64="vWM8NryGzyeMb16QzdbaMwc7Lp4=">AAACkHichVHLSsNQEJ3GV62P1roR3BRLxUUpk1qxCGLVjYiLPuwDapEk3tbQNAlJWqjFH3AvLgRFwYX4A+7d+AMu+gnisoIbF07TgGixTkjuuefOOcnJiLoimxZi28UNDY+MjrnHPROTU9Ne34w/Z2p1Q2JZSVM0oyAKJlNklWUt2VJYQTeYUBMVlher293zfIMZpqyp+1ZTZ6WaUFHlsiwJFlHFvXUM8+FoeDkcO/QFMYJ2BfoB74AgOJXUfI9wAEeggQR1qAEDFSzCCghg0lUEHhB04krQIs4gJNvnDE7BQ9o6dTHqEIit0rNCu6LDqrTvepq2WqK3KHQbpAxACF/wHjv4jA/4ip9/erVsj+63NGkVe1qmH3rP5jIf/6pqtFpw/K0aoBCpe3AmC8oQt7PIlE23mW5KqeffOLnoZNbSodYi3uIb5bvBNj5RQrXxLt2lWPqS3D00Iv73QPpBLhrhVyKYigUTW86w3DAPC7BEE1mFBOxAErL2Pz2HK7jm/Fyc2+A2e62cy9HMwo/idr8AA0eSJw==</latexit> L = 0, 1, 2, 3, 4 [tohoku BERT] https://github.com/cl-tohoku/bert-japanese 私 [MASK] 猫 が 好き です BERT L = 1 私 、 猫 が 好き です L = 2 私 も 猫 が 好き です L = 3 私 だって 猫 が 好き です L = 4 私 ⿊ 猫 が 好き です ︙ Lをどこまで増やす︖
  15. 19 /36 要素3 置換するトークン どの品詞を置換してDAを⾏うことが性能向上につながるのか⽐較検討を⾏う 本研究では、 ・助詞 ・名詞 ・動詞 のトークンを置換した場合で⽐較検討

    私 [MASK] 猫 が 好き です BERT L = 1 私 、 猫 が 好き です L = 2 私 も 猫 が 好き です L = 3 私 だって 猫 が 好き です L = 4 私 ⿊ 猫 が 好き です ︙ どの品詞を置換する︖ 名詞︖助詞︖動詞︖ 助詞
  16. 20 /36 実験設定︓データセット, モデル, ⽐較⼿法 ・Lang8 ︓相互添削型SNSLang8から作成したコーパス ・NIL ︓NAIST誤⽤コーパス (⽇記ドメイン)

    ・TEC_JL ︓評価⽤マルチリファレンスコーパス (SNSドメイン) ・DIR ︓株式会社⼤和総研がクラウドソーシングで作成した評価⽤データ(⾦融ドメイン) ・JGECM* ︓⽇本語書き⾔葉均衡コーパス(BCCWJ)から作成した評価⽤データ(雑誌ドメイン) ・BCCWJ_LB, BCCWJ_PB, BCCWJ_OC ︓⽇本語書き⾔葉均衡コーパス(BCCWJ)から作成した⽣成元データ ▪データセット ▪データ増強を⾏った提案⼿法 GECモデルにはTransformerにCopy 機構を組み込んだTransformerCopyを採⽤ ▪⽐較⼿法 ー SMTベースの⼿法︓ ・Moses[Koehn et al., 2007] ー NMTベースの⼿法︓ ・CNN ベースのモデル ・Bi-LSTM ベースのモデル ・データ増強しない通常のTransfomerCopy[Zhao et al., 2019] ←ベースライン * https://github.com/hideyoshikato/JGECM
  17. 21 /36 実験設定︓評価指標 GLEU[Napoles 2015] ・機械翻訳の評価尺度BLEUをGECのために改良した評価指標 ・誤り⽂(src), 正解⽂(tgt), GECモデルによる出⼒⽂(out)の3つの データを使って評価する

    ︓集合間での の重なり数 : outがsrcよりも短い場合に減点する項 M2 scorerとGLEUという2種類の評価指標を⽤いる GLEUの値が⼤きいほど性能が良い ︓訂正した単語が正しく訂正できているか ︓訂正するべき箇所がどの程度正しく訂正できたか ︓precisionとrecallの調和平均。 =0.5を使⽤ ︓訂正を⾏ない正解だった箇所 ︓訂正を⾏なったが訂正する必要がなかった箇所 ︓訂正を⾏なわなかったが訂正が必要だった箇所 F値が⼤きいほど性能が良い <latexit sha1_base64="Hu+jjiUEstgi4gPtyRrYh6EpfIA=">AAAChXichVG7SgNBFD2urxgfidoINmJQbAw3Eh/YGLSx1GiioCHsrpO4ZLO77E4CGvwBsVULKwUL8QP8ABt/wMJPEMsINhbebBZEg/EOM3PmzD135nA1xzQ8SfTSoXR2dff0hvrC/QODQ5Ho8EjWsyuuLjK6bdrurqZ6wjQskZGGNMWu4wq1rJliRyutNe53qsL1DNvalkeOyJXVomUUDF2VTKWlk4/GKE5+TLSCRABiCGLDjj5gHwewoaOCMgQsSMYmVHg89pAAwWEuhxpzLiPDvxc4QZi1Fc4SnKEyW+K1yKe9gLX43Kjp+WqdXzF5uqycwBQ90x3V6Ynu6ZU+/6xV82s0/nLEu9bUCicfOR3b+vhXVeZd4vBb1UahcXZ7TxIFLPleDPbm+EzDpd6sXz2+rG8tp6dq03RDb+zvml7okR1a1Xf9dlOkrxDmBiV+t6MVZOfiifk4bSZjqdWgVSGMYxIz3I9FpLCODWT43QLOcI4LpVeZVZLKQjNV6Qg0o/gRysoX0I+QaQ==</latexit> tp <latexit sha1_base64="IBdiX/HP1JYXLLZLgRL+sw44fXw=">AAAChXichVG7SgNBFD2u7/iK2gg2waDYGO6KL2wUbSxNYlRQCbvrbFyyL3Y3AQ3+gNiqhZWChfgBfoCNP2CRTxBLBRsL724WRMV4h5k5c+aeO3O4qmsafkBUb5Fa29o7Oru6Ez29ff0DycGhTd+peJooaI7peNuq4gvTsEUhMAJTbLueUCzVFFtqeTW836oKzzcceyM4dMWepZRsQzc0JWAqp7vFZJoyFEXqN5BjkEYc607yHrvYhwMNFVgQsBEwNqHA57EDGQSXuT3UmPMYGdG9wDESrK1wluAMhdkyryU+7cSszeewph+pNX7F5OmxMoVxeqJbeqVHuqNn+vizVi2qEf7lkHe1oRVuceBkJP/+r8riPcDBl6qJQuXs5p4C6FiIvBjszY2Y0KXWqF89unjNL+bGaxN0TS/s74rq9MAO7eqbdpMVuUskuEHyz3b8BpvTGXk2Q9mZ9PJK3KoujGIMk9yPeSxjDeso8Ls6TnGGc6lTmpJmpLlGqtQSa4bxLaSlT7LBkFs=</latexit> fp <latexit sha1_base64="jofUA7Z1NcsQWrErD7CKrTAXKgg=">AAAChXichVG7SgNBFD2u7/iK2gg2waDYGO6KL2wUbSxNYlRQCbvrbFyyL3Y3AQ3+gNiqhZWChfgBfoCNP2CRTxBLBRsL724WRMV4h5k5c+aeO3O4qmsafkBUb5Fa29o7Oru6Ez29ff0DycGhTd+peJooaI7peNuq4gvTsEUhMAJTbLueUCzVFFtqeTW836oKzzcceyM4dMWepZRsQzc0JWAqp9vFZJoyFEXqN5BjkEYc607yHrvYhwMNFVgQsBEwNqHA57EDGQSXuT3UmPMYGdG9wDESrK1wluAMhdkyryU+7cSszeewph+pNX7F5OmxMoVxeqJbeqVHuqNn+vizVi2qEf7lkHe1oRVuceBkJP/+r8riPcDBl6qJQuXs5p4C6FiIvBjszY2Y0KXWqF89unjNL+bGaxN0TS/s74rq9MAO7eqbdpMVuUskuEHyz3b8BpvTGXk2Q9mZ9PJK3KoujGIMk9yPeSxjDeso8Ls6TnGGc6lTmpJmpLlGqtQSa4bxLaSlT66BkFk=</latexit> fn M2 scorer[Dahlmeier 2012] ・Precision, Recall, F-measure の総称 ・1 単語単位ではなく、なるべく⻑い単位で訂正が正しいかを評価する <latexit sha1_base64="p/dC1W1MMyqiojog7TISRffhnHQ=">AAACiHichVG7SgNBFD1Z3/EVtRFsgiFiFW5EiVpJbCyjMSpoCLvrqIOb3WV3EojBH7CxVLFSsBA/wA+w8Qcs8gliqWBj4c1mQTSod5iZM2fuuTOHa7iW9BVRI6J1dHZ19/T2RfsHBoeGYyOjG75T8UxRMB3L8bYM3ReWtEVBSWWJLdcTetmwxKZxuNy836wKz5eOva5qriiW9X1b7klTV0wVdgyh9FIsQSkKIt4O0iFIIIycE7vHDnbhwEQFZQjYUIwt6PB5bCMNgstcEXXmPEYyuBc4RpS1Fc4SnKEze8jrPp+2Q9bmc7OmH6hNfsXi6bEyjiQ90S290iPd0TN9/FqrHtRo/qXGu9HSCrc0fDKef/9XVeZd4eBL9YfC4Oy/PSnsYT7wItmbGzBNl2arfvXo7DW/uJasT9E1vbC/K2rQAzu0q2/mzapYu0SUG5T+2Y52sDGTSs+laHU2sZQNW9WLCUximvuRwRJWkEOB35U4xTkutKhGWkZbaKVqkVAzhm+hZT8BjyiRmw==</latexit>
  18. 22 /36 要素1︓⽣成元データの実験結果 設定︓事前学習に⽤いるデータ量は各⽣成元データから 300,000 ⽂を取得し,BERT-DA によりデータ増強し たデータ量 |D𝑎 ∪

    D𝑎′ | = 600, 000 に設定.ベースラインは,データ増強しないTransformerCopyを表す. ドメイン OC︓Yahoo知恵袋 PB︓出版書籍 LB︓図書館書籍
  19. 23 /36 要素1︓⽣成元データの実験結果 設定︓事前学習に⽤いるデータ量は各⽣成元データから 300,000 ⽂を取得し,BERT-DA によりデータ増強し たデータ量 |D𝑎 ∪

    D𝑎′ | = 600, 000 に設定.ベースラインは,データ増強しないTransformerCopyを表す. 考察︓評価⽤データと近い性質を持つコーパスで事前学習を⾏うことで,性能向上が期待できる可能性を⽰唆 出版書籍(PB) ドメインで学習すると多くの場合で性能が向上する ドメイン OC︓Yahoo知恵袋 PB︓出版書籍 LB︓図書館書籍
  20. 24 /36 要素2︓データ⽣成量の実験結果 設定︓ の場合を⽐較, L = 0はデータ増強を⾏わないモデル, は学習に⽤いるデータ 量を表す.

    から300,000⽂サンプリング <latexit sha1_base64="vWM8NryGzyeMb16QzdbaMwc7Lp4=">AAACkHichVHLSsNQEJ3GV62P1roR3BRLxUUpk1qxCGLVjYiLPuwDapEk3tbQNAlJWqjFH3AvLgRFwYX4A+7d+AMu+gnisoIbF07TgGixTkjuuefOOcnJiLoimxZi28UNDY+MjrnHPROTU9Ne34w/Z2p1Q2JZSVM0oyAKJlNklWUt2VJYQTeYUBMVlher293zfIMZpqyp+1ZTZ6WaUFHlsiwJFlHFvXUM8+FoeDkcO/QFMYJ2BfoB74AgOJXUfI9wAEeggQR1qAEDFSzCCghg0lUEHhB04krQIs4gJNvnDE7BQ9o6dTHqEIit0rNCu6LDqrTvepq2WqK3KHQbpAxACF/wHjv4jA/4ip9/erVsj+63NGkVe1qmH3rP5jIf/6pqtFpw/K0aoBCpe3AmC8oQt7PIlE23mW5KqeffOLnoZNbSodYi3uIb5bvBNj5RQrXxLt2lWPqS3D00Iv73QPpBLhrhVyKYigUTW86w3DAPC7BEE1mFBOxAErL2Pz2HK7jm/Fyc2+A2e62cy9HMwo/idr8AA0eSJw==</latexit> L = 0, 1, 2, 3, 4 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0
  21. 25 /36 要素2︓データ⽣成量の実験結果 設定︓ の場合を⽐較, L = 0はデータ増強を⾏わないモデル, は学習に⽤いるデータ 量を表す.

    から300,000⽂サンプリング L=2で学習すると性能が向上する 考察︓𝐿 の数を増やすほど予測確率が低いトークンに置換された⽂が⽣成されるため,正しい⽂の⽂構造の崩 壊を招きやすくなる可能性を⽰唆 <latexit sha1_base64="vWM8NryGzyeMb16QzdbaMwc7Lp4=">AAACkHichVHLSsNQEJ3GV62P1roR3BRLxUUpk1qxCGLVjYiLPuwDapEk3tbQNAlJWqjFH3AvLgRFwYX4A+7d+AMu+gnisoIbF07TgGixTkjuuefOOcnJiLoimxZi28UNDY+MjrnHPROTU9Ne34w/Z2p1Q2JZSVM0oyAKJlNklWUt2VJYQTeYUBMVlher293zfIMZpqyp+1ZTZ6WaUFHlsiwJFlHFvXUM8+FoeDkcO/QFMYJ2BfoB74AgOJXUfI9wAEeggQR1qAEDFSzCCghg0lUEHhB04krQIs4gJNvnDE7BQ9o6dTHqEIit0rNCu6LDqrTvepq2WqK3KHQbpAxACF/wHjv4jA/4ip9/erVsj+63NGkVe1qmH3rP5jIf/6pqtFpw/K0aoBCpe3AmC8oQt7PIlE23mW5KqeffOLnoZNbSodYi3uIb5bvBNj5RQrXxLt2lWPqS3D00Iv73QPpBLhrhVyKYigUTW86w3DAPC7BEE1mFBOxAErL2Pz2HK7jm/Fyc2+A2e62cy9HMwo/idr8AA0eSJw==</latexit> L = 0, 1, 2, 3, 4 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0
  22. 27 /36 要素3︓置換するトークンの実験結果 設定︓⽇本語の誤りとして上位の助詞,名詞,動詞を⽐較.事前学習に⽤いるデータ量は各⽣成元データから 300,000 ⽂を取得し,BERT-DA によりデータ増強したデータ量 |D𝑎 ∪ D𝑎′

    | = 600, 000 に設定.ベースライ ンは,データ増強しないTransformerCopyを表す. 多くの場合,助詞を置換したデータセットで学習すると性能が向上する 考察︓⼤きな差異はない.置換するトークンの品詞による違いは少ない可能性を⽰唆.評価データに助詞の誤 りが多いから.
  23. 28 /36 既存システムとの性能⽐較 設定︓ これまでの実験の結果から (1) ⽣成元データは PB(出版書籍ドメイン) を⽤いる (2)

    データ⽣成量は 𝐿 = 2 で学習する という2つの知⾒によるモデルが既存システムの性能を上回る 注)Trans+BERT_lang8・・・事前学習をlang8で⾏ったモデル
  24. 30 /36 誤りタイプごとの評価 ⾦融ドメイン評価データ(DIR) 誤りタイプ delete_joshi ︓助詞の削除 insert_joshi ︓助詞の挿⼊ replace_joshi︓助詞の置換

    replace_kanji︓漢字の置換 ・提案⼿法の⻑所︓ 助詞の削除、挿⼊、置換の誤りに強い →学習に⽤いているlang8は⾔語学習者で助詞 誤りが最も多いから ・提案⼿法の短所︓ 漢字の置換に弱い → 学習データに漢字誤りが少ない。or 評価 データが現実的ではない誤りを付与している可 能性 順番直す
  25. 31 /36 誤りタイプごとの評価 新聞ドメイン評価データ(JGECM) 誤りタイプ delete ︓助詞・動詞以外の削除 doshi ︓動詞の誤り goi

    ︓語彙選択 hyoki ︓表記誤り insert_joshi ︓助詞の挿⼊ Insert ︓助詞・動詞以外の挿⼊ replace_joshi ︓助詞の置換 ・提案⼿法の⻑所︓ 新たな誤りタイプに対してもベースラインより ⾼い性能 →誤りタイプを限定しないモデルだから ・提案⼿法の短所︓ 語彙選択の誤りの性能が低い
  26. 32 /36 複数候補⽂での評価 ︓誤り箇所 ︓正解箇所 候補5(@5)まで出⼒すれば、⾼い性能で訂正が可能 ॿࢺͷ࡟আ 助詞の挿⼊ 置換(助詞) 漢字の置換

    1SFDJTJPO3FDBMM'NFBTVSF @4で正解が出⼒される .PEFM 5SBOT 5SBOT #&35@MBOH 5SBOT #&35@1# 1SFDJTJPO3FDBMM'NFBTVSF 1SFDJTJPO3FDBMM'NFBTVSF 1SFDJTJPO3FDBMM'NFBTVSF
  27. 33 /36 まとめ 成果 提案⼿法 問題点 ⼤規模な並列データが少ない BERTのMasked LMを⽤いたデータ増強(BERT-DA)を提案しGECモデルの性能向上に取組んだ ・複数の評価指標、複数ドメイン,

    複数の誤りタイプ下での実験により⽇本語GECとして最⾼性能 ・複数ドメイン,複数誤りタイプ下での⽇本語評価データの構築 https://github.com/hideyoshikato/JGECM ・他の事前学習済み BERT での性能⽐較 ・BERT-DA のデータ量をより増やした場合の性能⽐較 ・既存の他のデータ増強(Back Translation)での実験 今後の課題
  28. 34 /36 参考⽂献 [Wang 2020] Wang, Y., Wang, Y., Liu,

    J., Liu, Z. (2020). A Comprehensive Survey of Grammar Error Correc- tion. arXiv preprint arXiv:2005.06600. [Mizumoro 2012] Mizumoto, T., Komachi, M., Nagata, M., & Matsumoto, Y. (2011, November). Mining revision log of language learning SNS for automated Japanese error correction of second language learners. In Proceedings of 5th International Joint Conference on Natural Language Processing (pp. 147-155). [Kiyono 2019] Kiyono, S., Suzuki, J., Mita, M., Mizumoto, T., & Inui, K. (2019). An empirical study of incorporating pseudo data into grammatical error correction. arXiv preprint arXiv:1909.00502. [Koyama 2020] Koyama, A., Kiyuna, T., Kobayashi, K., Arai, M., Komachi, M. (2020, May). Construction of an Eval- uation Corpus for Grammatical Error Correction for Learners of Japanese as a Second Language. In Proceedings of The 12th Language Resources and Evaluation Conference (pp. 204-211). [Grundkiewicz] R. Grundkiewicz et al. “Neural Grammatical Error Correction Sys- tems with Unsupervised Pre- training on Synthetic Data”. In: BEA. 2019, pp. 252–263. [Zhao 2019] W. Zhao et al. “Improving Grammatical Error Correction via Pre- Training a Copy-Augmented Architecture with Unlabeled Data”. In: NAACL. 2019. [Vaswani 2017] Vaswani, A., Shazeer, N., Parmar, N., Uszkoreit, J., Jones, L., Gomez, A. N., ... & Polosukhin, I. (2017). Attention is all you need. In Advances in neural information processing systems (pp. 5998-6008). [Devlin 2018] Devlin, J., Chang, M. W., Lee, K., & Toutanova, K. (2018). Bert: Pre-training of deep bidirectional transformers for language understanding. arXiv preprint arXiv:1810.04805. [Maekawa 2014] KikuoMaekawa,MakotoYamazaki,ToshinobuOgiso,TakehikoMaruyama,HidekiOgura,Wakako Kashino, Hanae Koiso, Masaya Yamaguchi, Makiro Tanaka, and Yasuharu Den. Balanced corpus of contemporary written japanese. Language resources and evaluation, Vol. 48, No. 2, pp. 345–371, 2014. [tohoku BERT] https://github.com/cl-tohoku/bert-japanese [ogawa 2019] https://github.com/youichiro/transformer-copy
  29. 35 /36 参考⽂献 [Dahlmeier 2012] Daniel Dahlmeier and Hwee Tou

    Ng. Better evaluation for grammatical error correction. In Proceedings of the 2012 Conference of the North American Chapter of the Association for Compu- tational Linguistics: Human Language Technologies, pp. 568–572, Montréal, Canada, June 2012. Association for Computational Linguistics. [Napoles 2015] Courtney Napoles, Keisuke Sakaguchi, Matt Post, and Joel Tetreault. Ground truth for grammatical error correction metrics. In Proceedings of the 53rd Annual Meeting of the Association for Com- putational Linguistics and the 7th International Joint Conference on Natural Language Processing (Volume 2: Short Papers), pp. 588–593, Beijing, China, July 2015. Association for Computational Linguistics.
  30. 36 /36 誤りタイプ⼀覧 ▪削除(助詞) (例)「英語がわかる」→「英語[]わかる」 ▪挿⼊(助詞) (例)「英語がわかる」→「英語が[を]わかる」 ▪[置換(助詞)] (例)「英語がわかる」→「英語[に]わかる」 ▪[語彙選択]

    ⽂の中に含まれる単語をふさわしくない⽂字または単語の置換. (例1) 「権利を尊重する」→「権利を尊[敬]する」 (例2) 「常に外国の援助がいる」→「[まいにち]外国の援助がいる」 ▪[表記] ⽂に含まれる単語に対して漢字変換,平仮名,カタカナ,英字・⽇本語変換ミス,適切でない⾔い換えなどに関する誤り (例1︓漢字変換ミス) 「異議を申し⽴てる」→「[意義]を申し⽴てる」 (例2︓平仮名) 「ねんぱいの⼈」→「[ねんぱ]の⼈」 (例3︓カタカナ) 「レストランを離れる」→「[レストラ]を離れる」 (例4︓英字・⽇本語変換ミス) 「8%から10%へ引き上げる」→「8%[kara]10%へ引き上げる」 ▪[動詞] (例1︓削除)「レストランを離れる。」→「レストランを離れ[]。」 (例2︓挿⼊)「⼿紙を書かない」→「⼿紙を書か[か]ない」 (例3︓置換)「⼤きくなりました」→「⼤きく[され]ました」 ▪[削除(助詞・動詞以外)] (例1) 「タバコを吸うことは悪いことです」→「タバコを吸う[]は悪いことです」 (例2) 「同志社⼤学を離れる」→「同志社[]学を離れる」 ▪[挿⼊(助詞・動詞以外)] (例1) 「⽝は可愛い」→「⽝は可愛い[い]」 (例2) 「ご飯を⾷べます」→「[お]ご飯を⾷べます」
  31. 89 /36 <latexit sha1_base64="BeGWtfNDXIzTpkAD57kmvMsIjBI=">AAACuXichVFNTxRBEH2MX7igLHIx8ULYgB7IpIZgJBoSohePwLqAYXDTMzTsuD0fmendZBnnD/gHTOQECQfCD/Bq4gW4e+AnEI+YePFg7ewkBAlQk556/bpedb+UEykv0UQnfcat23fu3uu/XxoYfPBwqDz8aCkJW7Era26ownjFEYlUXiBr2tNKrkSxFL6j5LLTfNM9X27LOPHC4J3uRHLNF5uBt+G5QjNVL0/bvtANV6j0ffYhFU+zWTu1nVCtJx2fU9rJ6s2c/9SctSZN05ys2lm9XCGT8hi9DKwCVFDEfFj+BhvrCOGiBR8SATRjBYGEv1VYIETMrSFlLmbk5ecSGUqsbXGV5ArBbJP/m7xbLdiA992eSa52+RbFK2blKMbpJ+3TGR3SAZ3S3yt7pXmP7ls6nJ2eVkb1oc+Pq39uVPmcNRrnqmsUDldf70ljAzO5F4+9RTnTden2+re3vpxVXy6OpxO0S7/Y3w6d0A92GLR/u3sLcnEbJR6Q9f84LoOlKdN6btLCdGXudTGqfjzBGJ7xPF5gDm8xjxrf+xXfcYRj45UhjIbxsVdq9BWaEVwII/kHaTqlMg==</latexit> Ya0 = {ya0 k |k =

    1, ..., S} <latexit sha1_base64="Jx96DEly73QYtOkfetvmfaVHH58=">AAACuXichVFNTxRBEH2MIrh+sMLFxAthg3ggkxqC0UBIiFw4AuvCJgxueoaGHbbnIzO9G5dh/oB/wEROmHgw/gCvJlzAuwd+gvGIiRcO1M5OYpSINemp16/rVfdLOZHyEk10NmDcuDl4a2j4dunO3Xv3R8oPRteTsB27suaGKozrjkik8gJZ055Wsh7FUviOkhtOa6l3vtGRceKFwUvdjeSWL3YDb8dzhWaqUZ61faGbrlBpPXuViqlswU5tJ1TbSdfnlL7OGq2cP2gtWNOmaU5X7axRrpBJeYxfBVYBKihiJSx/ho1thHDRhg+JAJqxgkDC3yYsECLmtpAyFzPy8nOJDCXWtrlKcoVgtsX/Xd5tFmzA+17PJFe7fIviFbNyHJP0jT7SOZ3QJ/pOF//sleY9em/pcnb6Whk1Rt48rP76r8rnrNH8rbpG4XD19Z40dvA89+Kxtyhnei7dfv/O/tvz6tzaZPqY3tMP9ndEZ3TMDoPOT/fDqlw7RIkHZP09jqtgfca0npq0OltZfFGMahiPMIEnPI9nWMQyVlDje9/hC07x1Zg3hNE09vqlxkChGcMfYSSXZLylMA==</latexit> Xa0 = {xa0 k |k = 1, ..., S} 提案するデータ増強(BERT-DA) D <latexit sha1_base64="uJqIsCsXDiNz+fiEsqPI1ft22eM=">AAACkXichVG7SgNBFD2urxhfMTaCjRgUqzArAcUqqIVgEx8xgRjD7Drqkn25OwloyA/4AylsjGAhfoAfYOMPWPgJYhnBxsKbzYJoUO8wM2fO3HNnDldzTcOXjD33KL19/QODkaHo8Mjo2HhsIr7nOxVPF1ndMR0vr3FfmIYtstKQpsi7nuCWZoqcVl5r3+eqwvMNx96VZ64oWvzYNo4MnUuiivsWlyc6N2vr9QNeiiVYkgUx0w3UECQQRsaJ3WMfh3CgowILAjYkYRMcPo0CVDC4xBVRI84jZAT3AnVESVuhLEEZnNgyrcd0KoSsTed2TT9Q6/SKSdMj5Qzm2BO7ZS32yO7YC/v4tVYtqNH+yxntWkcr3NL4xdTO+78qi3aJky/VHwqNsv/2JHGE5cCLQd7cgGm71Dv1q+eN1s7K9lxtnl2zV/LXZM/sgRza1Tf9ZktsXyJKDVJ/tqMb5BaTaiqpqlupRHo17FUE05jFAjVkCWlsIIMsPXyKBq7QVOLKspJWwlylJ9RM4lsom59PHZVZ</latexit> Da GECモデル 学習 <latexit sha1_base64="yQ15yKUIijBVu2Sn2rcpwIxF7Sw=">AAACjnichVHLSsNQED3GV62PVt0IbsSiuCpT8YUgFt249NVaqFKSeNXQvEhuC7X0B9yLC0FRcCF+gB/gxh9w4SeIywpuXDhNA6JFnZB7zz0zZ5JzR3NNw5dEz21Ke0dnV3ekJ9rb1z8Qiw8OZX2n5Okiozum4+U01RemYYuMNKQpcq4nVEszxY5WXG3kd8rC8w3H3pYVV+xZ6qFtHBi6KpnK71qqPNJVs5qrFeIJSlIQY60gFYIEwlh34vfYxT4c6CjBgoANydiECp+fPFIguMztocqcx8gI8gI1RFlb4irBFSqzRV4P+ZQPWZvPjZ5+oNb5Kya/HivHMEFPdEt1eqQ7eqGPX3tVgx6Nf6nwrjW1wi3ETka23v9VWbxLHH2p/lBoXP23J4kDLAReDPbmBkzDpd7sXz4+q28tbk5UJ+maXtnfFT3TAzu0y2/6zYbYPEeUB5T6OY5WkJ1OpmaTtDGTSK+Eo4pgFOOY4nnMI401rCMT3OgpLnCpxJU5ZUlZbpYqbaFmGN9CWfsE8NKUZQ==</latexit> X <latexit sha1_base64="aDB5vRM9q14OAbvR6yJFJfpPV9E=">AAACjnichVHLSsNQED3GV62vqhvBTbEorspUfCGIohuXVq0PapEkXjU0L5LbQi39AffiQlAUXIgf4Ae48Qdc9BPEZQU3LpymAVGxTsi9556ZM8m5o7mm4UuiaovS2tbe0Rnpinb39Pb1xwYGt3yn4Okiozum4+1oqi9MwxYZaUhT7LieUC3NFNtafqWe3y4Kzzcce1OWXJGz1CPbODR0VTKV3bNUeayrZnm3sh9LUJKCiP8GqRAkEMaaE3vAHg7gQEcBFgRsSMYmVPj8ZJECwWUuhzJzHiMjyAtUEGVtgasEV6jM5nk94lM2ZG0+13v6gVrnr5j8eqyMY4ye6Y5q9ET39EIff/YqBz3q/1LiXWtohbvffzq88f6vyuJd4vhL1UShcXVzTxKHmAu8GOzNDZi6S73Rv3hyXtuYXx8rj9MNvbK/a6rSIzu0i2/6bVqsXyDKA0r9HMdvsDWZTE0nKT2VWFoORxXBCEYxwfOYxRJWsYZMcKNnuMSVElNmlAVlsVGqtISaIXwLZfUT8vOUZg==</latexit> Y <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa <latexit sha1_base64="la8LE71hJcTuThCSIhjC5TT+VGM=">AAAClXichVHdShtBFP7c2qqxNtv2wkJvgiHqVTgplYogSJXiXY0aE4gaZteJLu4fu5OALnkBX8ALvVFQKH0AH8AbX6AXPkLxUsEbLzzZLIgG9Qwz88035zszH8fwbStURJc92pvet+/6+gdSg++HPqT1j59WQq8RmLJkerYXVAwRSttyZUlZypYVP5DCMWxZNrZn2/flpgxCy3OX1Y4v1xyx6Vp1yxSKqZqurzpCbZnCjiqt9UiMtWp6lvIUR6YbFBKQRRILnn6GVWzAg4kGHEi4UIxtCIQ8qiiA4DO3hoi5gJEV30u0kGJtg7MkZwhmt3nd5FM1YV0+t2uGsdrkV2yeASszyNE/+kPXdEF/6T/dPVsrimu0/7LDu9HRSr+W3vuydPuqyuFdYetB9YLC4OyXPSnUMRl7sdibHzNtl2anfnN3/3ppajEXjdIxXbG/I7qkc3boNm/Mk6JcPECKG1R42o5usPItX5jIU/F7duZn0qp+fMUIxrkfPzCDeSygxO82cYgTnGrD2rQ2p/3qpGo9ieYzHoX2+x5KDJam</latexit> Xa0 <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="WosBsmmFw3xSe42rxe/pxWzjOsw=">AAAC73ichVHLahRBFD3TvuL4yEQ3gpvBIRJhaKqDEhECIWYhuMnDSQLpZKjuVJJyqh921wyOlf4BfyCLgKiQRcgH+AFuXLpxkU8QlxHdCOZOT0OMg/E23ffWuefc6sP1YiVTzdhhyTp3/sLFS0OXy1euXrs+XBm5sZhG7cQXDT9SUbLs8VQoGYqGllqJ5TgRPPCUWPJaj3v9pY5IUhmFz3Q3FqsB3wzlhvS5JqhZee4GXG/5XJmZbM3wNePGiQxEllUnq64Zc71IrafdgJJ5mTVN6zSp/me/O9i/t92adOq2bdefulmzUmM2y6M6WDhFUUMRs1HlA1ysI4KPNgIIhNBUK3Ck9KzAAUNM2CoMYQlVMu8LZCiTtk0sQQxOaIu+m3RaKdCQzr2Zaa726RZFb0LKKkbZF7bPjtgndsC+sl//nGXyGb1/6VL2+loRN4df31r4+V9VQFlj60R1hsIj9tmeNDbwMPciyVucIz2Xfn9+59XO0cKj+VFzl71n38jfO3bIPpLDsPPd35sT87so04Kcv9cxWCyO284Dm83dr01NF6sawm3cwRjtYwJTeIJZNOjez/iB3yVYL6wd6431tk+1SoXmJk6FtX8MMh+8gg==</latexit> Da0 = {(xa0 k , ya0 k )|k = 1, ..., K}︓疑似データ <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="mmLwScYFwe4dMzysPNtZe9dr0gE=">AAAClXichVHLSiNBFD1pHc1kRm11McJsZII6q3AjiiIIYUbE3fiKD9QJ1W2pTfpFdyUQm/zA/MAsdKMQQfwAP8CNP+DCTxCXCm5czE2nYVBRb1FVp07dc6sO1/BtK1RE1ymtrf1DR2f6Y+bT567uHr23byX0KoEpi6Zne8GaIUJpW64sKkvZcs0PpHAMW64a5Z/N+9WqDELLc5dVzZdbjth1rR3LFIqpkq5vOkLtmcKO1uu/IzFSL+lZylEcgy9BPgFZJDHv6efYxDY8mKjAgYQLxdiGQMhjA3kQfOa2EDEXMLLie4k6MqytcJbkDMFsmdddPm0krMvnZs0wVpv8is0zYOUghuiKTumOLumMbujx1VpRXKP5lxrvRksr/VLPn4Glh3dVDu8Ke/9VbygMzn7bk8IOJmMvFnvzY6bp0mzVr+7/vVuaWhyKhumYbtnfEV3TBTt0q/dmY0EuHiDDDco/b8dLsDKay4/naGEsW/iRtCqNr/iG79yPCRQwh3kU+d0qDtHAifZFm9ZmtNlWqpZKNP14Etqvf0wylqc=</latexit> Ya0 ︓ から⽣成した正解⽂の集合 ︓ から⽣成した誤り⽂の集合 ⽣成元データ ( のみで構成) <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya a 既存データ増強 (Direct Noiseなど) <latexit sha1_base64="o7s+eg2azMUlvoD/hfS0w0PHhfk=">AAACkHichVHLLgRBFD3T3uM12EhsxIRYTe4IMbHx2ojVDMYjDKluhc70S3fNJEz8gB+wsDESC/EBPsDGD1j4BLEksbFwp6cTQXArVXXq1D236uTqnmUGiugxpjU0NjW3tLbF2zs6u7oTPb2rgVvyDZk3XMv113URSMt0ZF6ZypLrni+FrVtyTS/O1+7XytIPTNdZUUeeLNhi3zH3TEMopgpbtlAHhrAqGyfbYieRpBSFMfgTpCOQRBRZN3GLLezChYESbEg4UIwtCAQ8NpEGwWOugApzPiMzvJc4QZy1Jc6SnCGYLfK6z6fNiHX4XKsZhGqDX7F4+qwcxDA90DW90D3d0BO9/1qrEtao/eWId72uld5O92n/8tu/Kpt3hYNP1R8KnbP/9qSwh0zoxWRvXsjUXBr1+uXjs5flqaXhyghd0jP7q9Ij3bFDp/xqXOXk0jni3KD093b8BKtjqfREinLjyZm5qFWtGMAQRrkfk5jBArLI87uHOMMFqlqvltGmtdl6qhaLNH34EtriB92HlTk=</latexit> Ya <latexit sha1_base64="yRQD4IUJd1locc8tAuu9+fTnR0o=">AAACkHichVHNLkNBFP5c//VXtZHYiIZYNadCNDaKjVjVT2lCydxrcNP7595pExov4AUsbFRiIR7AA9h4AQuPIJYkNhZOb28iCM5kZr755nxn5svRPcsMFNFjk9bc0trW3tEZ6+ru6e2L9yfWA7fsGzJvuJbrF3QRSMt0ZF6ZypIFz5fC1i25oZcW6vcbFekHpuusqSNPFm2x75h7piEUU8UtW6gDQ1jVwsm22IknKUVhDP8E6QgkEUXOjd9iC7twYaAMGxIOFGMLAgGPTaRB8Jgrosqcz8gM7yVOEGNtmbMkZwhmS7zu82kzYh0+12sGodrgVyyePiuHMUoPdE0vdE839ETvv9aqhjXqfzniXW9opbfTdzq4+vavyuZd4eBT9YdC5+y/PSnsIRN6MdmbFzJ1l0ajfuX47GV1ZmW0OkaX9Mz+avRId+zQqbwaV8ty5RwxblD6ezt+gvWJVHoqRcuTyex81KoODGEE49yPaWSxiBzy/O4hznCBmpbQMtqsNtdI1ZoizQC+hLb0AdtklTg=</latexit> Xa ▪ Direct Noise︓直接ノイズを加える⼿法 以下の4つの操作により誤りを発⽣させる 置換 10%の確率でランダムな単語に置換 10%の確率で削除 10%の確率で後ろにランダムな単語を挿⼊ 正規分布の確率値に基づいて語順を⼊れ替え 削除 挿⼊ ⼊れ替え ︓ に含まれる⽂の数 ︓分析者が決めるパラメータ <latexit sha1_base64="7DbRk2VapVNoyLlI68KCl91oIPA=">AAACnnichVHLLgRBFD3ae7wGG4kFMSFWk2qRECvxSGzEc8wkhkl1K1T0K901k9AZH+AHLKwICxG2PsDGD1j4BLEksbFwp6cTQXArVXXq1D236uQaniUDxdhjjVZbV9/Q2NScaGlta+9IdnatBW7RN0XGdC3Xzxk8EJZ0REZJZYmc5wtuG5bIGnvTlftsSfiBdJ1Vte+JDZvvOHJbmlwRVUj25G2udk1uhTPlzZBvhnnPl7YolwvJFEuzKPp/Aj0GKcSx6CZvkccWXJgowoaAA0XYAkdAYx06GDziNhAS5xOS0b1AGQnSFilLUAYndo/WHTqtx6xD50rNIFKb9IpF0ydlPwbZA7tkL+yeXbEn9v5rrTCqUfnLPu1GVSu8QsdRz8rbvyqbdoXdT9UfCoOy//aksI3xyIskb17EVFya1fqlg+OXlYnlwXCInbFn8nfKHtkdOXRKr+bFklg+QYIapH9vx0+QHUnro2ldXxpNTU7FvWpCLwYwTA0ZwyTmsIgMPXyIc1zjRuvTZrV5baGaqtXEmm58CS33AfYBmr8=</latexit> Da0 <latexit sha1_base64="REXXpASbaBlA0QBZgYRzWrLu17Q=">AAACjnichVFNLwNRFD3Gd30VG4lNoyFWza34ikQ0bJqwQBVJicyMV17MV2Zem9D4A/ZiISEkFuIH+AE2/oCFnyCWJDYWbqeTCII7mffOO/eeO3PeNTxLBorosU6rb2hsam5pjbW1d3R2xbt7VgO35Jsib7qW668beiAs6Yi8ksoS654vdNuwxJqxN1fNr5WFH0jXWVH7nti09R1HFqWpK6YK89O5DSVtESQWtuJJSlEYiZ8gHYEkolh047fYwDZcmCjBhoADxdiCjoCfAtIgeMxtosKcz0iGeYFDxFhb4irBFTqze7zu8KkQsQ6fqz2DUG3yVyx+fVYmMEgPdE0vdE839ETvv/aqhD2q/7LPu1HTCm+r66gv9/avyuZdYfdT9YfC4Oq/PSkUMRl6kezNC5mqS7PWv3xw8pKbWh6sDNElPbO/C3qkO3bolF/NqyWxfIoYDyj9fRw/wepIKj2WoqXRZGY2GlUL+jGAYZ7HBDLIYhH58EaPcYZzLa6Na9PaTK1Uq4s0vfgSWvYDU2qTpA==</latexit> K = S ⇥ L <latexit sha1_base64="EDeoju0uVhwDQxrW53I3Qu3f21s=">AAACh3ichVG7SgNBFD1Z3/EVtRFsgiFiIfGu+MIqamNhEY2JgorsrpNkcV/sbgIx+AMWtgpWChbiB/gBNv6AhZ8glgo2Ft7dLIiKeoeZOXPmnjtzuKpj6J5P9BiTWlrb2js6u+LdPb19/YmBwaJnV11NFDTbsN0tVfGEoVui4Ou+IbYcVyimaohN9WA5uN+sCdfTbWvDrzti11TKll7SNcUPqPxEcnUvkaIMhZH8CeQIpBBFzk7cYgf7sKGhChMCFnzGBhR4PLYhg+Awt4sGcy4jPbwXOEKctVXOEpyhMHvAa5lP2xFr8Tmo6YVqjV8xeLqsTCJND3RNL3RPN/RE77/WaoQ1gr/UeVebWuHs9R8P59/+VZm8+6h8qv5QqJz9tycfJcyHXnT25oRM4FJr1q8dnr7kF9bTjTG6pGf2d0GPdMcOrdqrdrUm1s8R5wbJ39vxExSnMvJMhtamU9mlqFWdGMEoxrkfc8hiBTkU+N0KTnCKM6lLmpRmpflmqhSLNEP4EtLiByuBkIQ=</latexit> S, L BERT-DAを⾏う場合