joinコマンドを中心とした、キーを元に2つのテキストデータを連結する処理。実際に業務で必要に迫られた事例も。イベントレポートブログはこちら。 http://papiro.hatenablog.jp/entry/2017/04/24/002533
ςΩετσʔλͷ࿈݁USP༑ͷձ @papiron
View Slide
RDBͷςʔϒϧ࿈݁• ෳͷςʔϒϧΛ࿈݁• SQLͷJOINͰ• PKͱFK• ෦݁߹ɺ֎෦݁߹ɺશ݁߹
ςΩετϑΝΠϧͷ࿈݁• ෳͷςΩετϑΝΠϧΛ࿈݁• joinίϚϯυͳͲͰ• ಛఆͷΧϥϜΛΩʔ• ෦݁߹ɺ֎෦݁߹ɺશ݁߹
࿈݁ʹؔ͢ΔίϚϯυ• joinίϚϯυ• pasteίϚϯυ• TukubaiͷjoinܥɺloopjͳͲ• ͦͷଞิॿతͳίϚϯυ• commͳͲ
͓• ͓1• ϚελͱτϥϯβΫγϣϯͷ࿈݁• ෦݁߹• ͓2• ิΛ͏σʔλͷ࿈݁• ֎෦݁߹͓2࣮ࡍͷۀͰඞཁʹഭΒΕͨࣄྫ
͓101 suzuki02 saitou03 yamada04 isono05 toudou0001 05 20170101 6000002 04 20170101 8000003 01 20170101 5000004 02 20170102 9000005 03 20170102 8000006 02 20170102 1000007 02 20170102 9000008 05 20170102 6000009 03 20170103 4000010 01 20170103 2000011 03 20170103 9000012 03 20170103 9000013 01 20170103 5000014 03 20170103 6000015 04 20170103 9000016 05 20170104 7000017 04 20170104 3000018 02 20170104 7000019 05 20170104 4000020 01 20170104 700MASTERid,୲ऀTRANid,୲ऀid,,ൢചֹۚ
͓1.1$ sort -s -k2,2 TRAN | join -1 1 -2 2 MASTER -01 suzuki 0003 20170101 50001 suzuki 0010 20170103 20001 suzuki 0013 20170103 50001 suzuki 0020 20170104 70002 saitou 0004 20170102 90002 saitou 0006 20170102 10002 saitou 0007 20170102 90002 saitou 0018 20170104 70003 yamada 0005 20170102 80003 yamada 0009 20170103 40003 yamada 0011 20170103 90003 yamada 0012 20170103 90003 yamada 0014 20170103 60004 isono 0002 20170101 80004 isono 0015 20170103 90004 isono 0017 20170104 30005 toudou 0001 20170101 60005 toudou 0008 20170102 60005 toudou 0016 20170104 70005 toudou 0019 20170104 400جຊతͳ݁߹ʢΩʔɺ1൪ͷϑΝΠϧɺ2൪ͷϑΝΠϧͷॱ൪ͰฒͿʣ࿈݁͢ΔσʔλΩʔͰฒସ͑Λ͢Δඞཁ͋Γ
͓1.1a$ sort -s -k2,2 TRAN | join1 key=2 MASTER -0003 01 suzuki 20170101 5000010 01 suzuki 20170103 2000013 01 suzuki 20170103 5000020 01 suzuki 20170104 7000004 02 saitou 20170102 9000006 02 saitou 20170102 1000007 02 saitou 20170102 9000018 02 saitou 20170104 7000005 03 yamada 20170102 8000009 03 yamada 20170103 4000011 03 yamada 20170103 9000012 03 yamada 20170103 9000014 03 yamada 20170103 6000002 04 isono 20170101 8000015 04 isono 20170103 9000017 04 isono 20170104 3000001 05 toudou 20170101 6000008 05 toudou 20170102 6000016 05 toudou 20170104 7000019 05 toudou 20170104 400Tukubaiͷjoin1Λ͏ʢϚελ͕ࢦఆͨ͠Ωʔͷ࣍ͷΧϥϜ݁߹ʣ࿈݁͢ΔσʔλΩʔͰฒସ͑Λ͢Δඞཁ͋Γ
͓1.1b$ cjoin1 key=2 MASTER TRAN0001 05 toudou 20170101 6000002 04 isono 20170101 8000003 01 suzuki 20170101 5000004 02 saitou 20170102 9000005 03 yamada 20170102 8000006 02 saitou 20170102 1000007 02 saitou 20170102 9000008 05 toudou 20170102 6000009 03 yamada 20170103 4000010 01 suzuki 20170103 2000011 03 yamada 20170103 9000012 03 yamada 20170103 9000013 01 suzuki 20170103 5000014 03 yamada 20170103 6000015 04 isono 20170103 9000016 05 toudou 20170104 7000017 04 isono 20170104 3000018 02 saitou 20170104 7000019 05 toudou 20170104 4000020 01 suzuki 20170104 700Tukubaiͷcjoin1Λ͏ʢϚελ͕ࢦఆͨ͠Ωʔͷ࣍ͷΧϥϜ݁߹ʣ࿈݁͢ΔσʔλΩʔͰฒସ͑Λ͢Δඞཁ͕ͳ͍
͓1.2$ sort -s -k2,2 TRAN | join -1 1 -2 2 -o 2.1,0,1.2,2.3,2.4 MASTER -0003 01 suzuki 20170101 5000010 01 suzuki 20170103 2000013 01 suzuki 20170103 5000020 01 suzuki 20170104 7000004 02 saitou 20170102 9000006 02 saitou 20170102 1000007 02 saitou 20170102 9000018 02 saitou 20170104 7000005 03 yamada 20170102 8000009 03 yamada 20170103 4000011 03 yamada 20170103 9000012 03 yamada 20170103 9000014 03 yamada 20170103 6000002 04 isono 20170101 8000015 04 isono 20170103 9000017 04 isono 20170104 3000001 05 toudou 20170101 6000008 05 toudou 20170102 6000016 05 toudou 20170104 7000019 05 toudou 20170104 400ΧϥϜͷฒͼॱΛࢦఆͯ݁͠߹ʢTRANͷ୲ऀidͷՕॴʹ୲ऀΛೖΕΔʣ-oΦϓγϣϯͰΩʔͷΧϥϜ0ͱࢦఆTukubaiͷjoin1cjoin1ͱಉ͡ฒͼΛࢦఆ
͓1.3$ awk '{a[$2" "$3]+=$4}END{for(v in a)print v,a[v] }' TRAN| sort -s -k1,1 | join -1 1 -2 1 MASTER -01 suzuki 20170101 50001 suzuki 20170103 70001 suzuki 20170104 70002 saitou 20170102 190002 saitou 20170104 70003 yamada 20170102 80003 yamada 20170103 280004 isono 20170101 80004 isono 20170103 90004 isono 20170104 30005 toudou 20170101 60005 toudou 20170102 60005 toudou 20170104 1100୲ऀͱച্ຖʹूܭʢawkͷ࿈ྻΛར༻ʣawkͷ࿈ྻforϧʔϓͰग़ྗ͢Δࡍฒସ͕͑ඞཁ
͓1.3a$ sort -s -k2,2 TRAN | sm2 2 3 4 4 | join1 key=1 MASTER01 suzuki 20170101 50001 suzuki 20170103 70001 suzuki 20170104 70002 saitou 20170102 190002 saitou 20170104 70003 yamada 20170102 80003 yamada 20170103 280004 isono 20170101 80004 isono 20170103 90004 isono 20170104 30005 toudou 20170101 60005 toudou 20170102 60005 toudou 20170104 1100୲ऀͱച্ຖʹूܭʢTukubaiͷsm2ͱjoin1Λར༻ʣूܭॲཧ͕sm2ίϚϯυͰεοΩϦॻ͚Δ
͓2 01 xxxx.0c4d.0c4402 xxxx.0d46.f3c303 xxxx.0d17.731304 xxxx.0d81.33a805 xxxx.0d17.965806 xxxx.0c4d.09dc07 xxxx.0a69.b7b108 xxxx.0d81.1dac09 xxxx.0fff.d82810 xxxx.0d17.747711 xxxx.0d46.33c312 xxxx.0148.3c5813 xxxx.0d46.f3c614 xxxx.0d81.1c9c15 xxxx.04f8.c8e516 xxxx.0d46.7eb617 xxxx.0d42.5cc418 xxxx.108b.cc1c19 xxxx.0fff.ce9420 xxxx.02b6.4c24xxxx.17d0.2c07 1914xxxx.0d81.33a8 2353xxxx.0d81.1c9c 2275xxxx.0d46.33c3 2500xxxx.0fff.d828 3766xxxx.13a2.8858 3235xxxx.13aa.091e 3119xxxx.0d81.1dac 3382xxxx.0c4d.0c44 3618xxxx.0d46.7eb6 3443xxxx.0d46.f3c3 3431xxxx.1301.bac4 1607xxxx.0d46.f3c6 3063xxxx.04f8.c8e5 3120xxxx.0d55.9b9f 3119DEVICELISTid,MACΞυϨεMEASUREMENTMACΞυϨε,ܭଌMACΞυϨεޓ͍ʹΓͳ͍ͷ͕͋Δ
͓2.1$ comm <(awk '{print $2}' DEVICELIST | sort) <(awk '{print $1}' MEASUREMENT | sort)xxxx.0148.3c58xxxx.02b6.4c24xxxx.04f8.c8e5xxxx.0a69.b7b1xxxx.0c4d.09dcxxxx.0c4d.0c44xxxx.0d17.7313xxxx.0d17.7477xxxx.0d17.9658xxxx.0d42.5cc4xxxx.0d46.33c3xxxx.0d46.7eb6xxxx.0d46.f3c3xxxx.0d46.f3c6xxxx.0d55.9b9fxxxx.0d81.1c9cxxxx.0d81.1dacxxxx.0d81.33a8xxxx.0fff.ce94xxxx.0fff.d828xxxx.108b.cc1cxxxx.1301.bac4xxxx.13a2.8858xxxx.13aa.091exxxx.17d0.2c07commίϚϯυͰ֬ೝ͢Δ࣌ฒସ͕͑ඞཁୈҰΧϥϜɹ1ͭͷϑΝΠϧͷΈʹଘࡏ͢ΔߦୈೋΧϥϜɹ2ͭͷϑΝΠϧͷΈʹଘࡏ͢ΔߦୈࡾΧϥϜɹ2ͭͷϑΝΠϧʹڞ௨ͯ͠ଘࡏ͢Δߦޓ͍ʹڞ௨͢ΔΩʔͱΓͳ͍Ωʔͷ֬ೝ
͓2.2$ join -a 1 -1 2 <(sort -s -k2,2 DEVICELIST) <(sort -k1,1 MEASUREMENT) | sort-k2,2 | awk '{print $1,$3}'xxxx.0c4d.0c44 3618xxxx.0d46.f3c3 3431xxxx.0d17.7313xxxx.0d81.33a8 2353xxxx.0d17.9658xxxx.0c4d.09dcxxxx.0a69.b7b1xxxx.0d81.1dac 3382xxxx.0fff.d828 3766xxxx.0d17.7477xxxx.0d46.33c3 2500xxxx.0148.3c58xxxx.0d46.f3c6 3063xxxx.0d81.1c9c 2275xxxx.04f8.c8e5 3120xxxx.0d46.7eb6 3443xxxx.0d42.5cc4xxxx.108b.cc1cxxxx.0fff.ce94xxxx.02b6.4c24LEFT JOIN૬ͷ֎෦݁߹ʢjoinίϚϯυͷ-aΦϓγϣϯΛར༻ʣMacFreeBSDͷํgjoinίϚϯυͰDEVICELISTͷid൪߸Λͬͯݩͷॱ൪ʹฒସ͍͑ͯΔ࠷ޙʹid൪߸Λআ
͓2.2a$ join2 key=1 <(sort -k1,1 MEASUREMENT) <(sort -s -k2,2 DEVICELIST | self 2 1)| sort -k3,3 | delf 3xxxx.0c4d.0c44 3618xxxx.0d46.f3c3 3431xxxx.0d17.7313 _xxxx.0d81.33a8 2353xxxx.0d17.9658 _xxxx.0c4d.09dc _xxxx.0a69.b7b1 _xxxx.0d81.1dac 3382xxxx.0fff.d828 3766xxxx.0d17.7477 _xxxx.0d46.33c3 2500xxxx.0148.3c58 _xxxx.0d46.f3c6 3063xxxx.0d81.1c9c 2275xxxx.04f8.c8e5 3120xxxx.0d46.7eb6 3443xxxx.0d42.5cc4 _xxxx.108b.cc1c _xxxx.0fff.ce94 _xxxx.02b6.4c24 _LEFT JOIN૬ͷ֎෦݁߹ʢTukubaiͷjoin2ίϚϯυΛར༻ʣTukubaiͰLEFT JOIN૬ͷίϚϯυແ͍ɻ͔͠͠σʔλΛೖΕସ͑ͨΓ͢Ε࣮ݱՄೳɻid൪߸ͷআʹdelfίϚϯυΛར༻ɻ
͓2.3$ join -a 1 -a 2 -1 2 -2 1 -o 0,1.1,2.2 -e @ <(sort -k2,2 DEVICELIST)<(sort -k1,1 MEASUREMENT)xxxx.0148.3c58 12 @xxxx.02b6.4c24 20 @xxxx.04f8.c8e5 15 3120xxxx.0a69.b7b1 07 @xxxx.0c4d.09dc 06 @xxxx.0c4d.0c44 01 3618xxxx.0d17.7313 03 @xxxx.0d17.7477 10 @xxxx.0d17.9658 05 @xxxx.0d42.5cc4 17 @xxxx.0d46.33c3 11 2500xxxx.0d46.7eb6 16 3443xxxx.0d46.f3c3 02 3431xxxx.0d46.f3c6 13 3063xxxx.0d55.9b9f @ 3119xxxx.0d81.1c9c 14 2275xxxx.0d81.1dac 08 3382xxxx.0d81.33a8 04 2353xxxx.0fff.ce94 19 @xxxx.0fff.d828 09 3766xxxx.108b.cc1c 18 @xxxx.1301.bac4 @ 1607xxxx.13a2.8858 @ 3235xxxx.13aa.091e @ 3119xxxx.17d0.2c07 @ 1914શ݁߹ʢjoinίϚϯυͷ-aΦϓγϣϯΛ2ճར༻ʣ-eΦϓγϣϯͰิจࣈྻΛࢦఆՄೳ
͓2.3a$ loopj -d@ num=1 <(sort -s -k2,2 DEVICELIST | self 2 1) <(sort -k1,1MEASUREMENT)xxxx.0148.3c58 12 @xxxx.02b6.4c24 20 @xxxx.04f8.c8e5 15 3120xxxx.0a69.b7b1 07 @xxxx.0c4d.09dc 06 @xxxx.0c4d.0c44 01 3618xxxx.0d17.7313 03 @xxxx.0d17.7477 10 @xxxx.0d17.9658 05 @xxxx.0d42.5cc4 17 @xxxx.0d46.33c3 11 2500xxxx.0d46.7eb6 16 3443xxxx.0d46.f3c3 02 3431xxxx.0d46.f3c6 13 3063xxxx.0d55.9b9f @ 3119xxxx.0d81.1c9c 14 2275xxxx.0d81.1dac 08 3382xxxx.0d81.33a8 04 2353xxxx.0fff.ce94 19 @xxxx.0fff.d828 09 3766xxxx.108b.cc1c 18 @xxxx.1301.bac4 @ 1607xxxx.13a2.8858 @ 3235xxxx.13aa.091e @ 3119xxxx.17d0.2c07 @ 1914શ݁߹ʢTukubaiͷloopjίϚϯυΛར༻ʣ-dΦϓγϣϯͰิจࣈྻΛࢦఆՄೳ