Upgrade to Pro — share decks privately, control downloads, hide ads and more …

SystemV IPC

SystemV IPC

Masami Ichikawa

October 26, 2014
Tweet

More Decks by Masami Ichikawa

Other Decks in Programming

Transcript

  1. PWFSWJFX w MJCDଆ w LFSOFMଆ w ओͳߏ଄ମ w *1$શൠͷڞ௨ॲཧ w

    ڞ௨ॲཧ TINHFUɺTFNHFUɺNTHHFUڞ௨෦  w ڞ༗ϝϞϦ
  2. PWFSWJFX w MJCDଆ w LFSOFMଆ w ओͳߏ଄ମ w *1$શൠͷڞ௨ॲཧ w

    ڞ௨ॲཧ TINHFUɺTFNHFUɺNTHHFUڞ௨෦  w ڞ༗ϝϞϦ
  3. JQD  ͷݺͼग़͠ w TZTEFQTVOJYTZTWMJOVYTINBUD 41 resultvar = INTERNAL_SYSCALL (ipc,

    err, 5, IPCOP_shmat, 42 shmid, shmflg, 43 (long int) &raddr, 44 (void *) shmaddr); wTZTEFQTVOJYTZTWMJOVYY@TZTEFQI 220 # define INTERNAL_SYSCALL_NCS(name, err, nr, args...) \ 221 ({ \ 222 unsigned long int resultvar; \ 223 LOAD_ARGS_##nr (args) \ 224 LOAD_REGS_##nr \ 225 asm volatile ( \ 226 "syscall\n\t" \ 227 : "=a" (resultvar) \ 228 : "0" (name) ASM_ARGS_##nr : "memory", "cc", "r11", "cx"); \ 229 (long int) resultvar; }) 230 # undef INTERNAL_SYSCALL 231 # define INTERNAL_SYSCALL(name, err, nr, args...) \ 232 INTERNAL_SYSCALL_NCS (__NR_##name, err, nr, ##args)
  4. PWFSWJFX w MJCDଆ w LFSOFMଆ w ओͳߏ଄ମ w *1$શൠͷڞ௨ॲཧ w

    ڞ௨ॲཧ TINHFUɺTFNHFUɺNTHHFUڞ௨෦  w ڞ༗ϝϞϦ
  5. ڞ༗ϝϞϦ؅ཧΦϒδΣΫτ w JODMVEFMJOVYTINI 9 struct shmid_kernel /* private to the

    kernel */ 10 { 11 struct kern_ipc_perm shm_perm; 12 struct file *shm_file; 13 unsigned long shm_nattch; 14 unsigned long shm_segsz; 15 time_t shm_atim; 16 time_t shm_dtim; 17 time_t shm_ctim; 18 pid_t shm_cprid; 19 pid_t shm_lprid; 20 struct user_struct *mlock_user; 21 22 /* The task created the shm object. NULL if the task is dead. */ 23 struct task_struct *shm_creator; 24 struct list_head shm_clist; /* list by creator */ 25 };
  6. ηϚϑΥ؅ཧΦϒδΣΫτ w JODMVEFMJOVYTFNI 12 struct sem_array { 13 struct kern_ipc_perm

    ____cacheline_aligned_in_smp 14 sem_perm; /* permissions .. see ipc.h */ 15 time_t sem_ctime; /* last change time */ 16 struct sem *sem_base; /* ptr to first semaphore in array */ 17 struct list_head pending_alter; /* pending operations */ 18 /* that alter the array */ 19 struct list_head pending_const; /* pending complex operations */ 20 /* that do not alter semvals */ 21 struct list_head list_id; /* undo requests on this array */ 22 int sem_nsems; /* no. of semaphores in array */ 23 int complex_count; /* pending complex operations */ 24 };
  7. NFTTBHFRVFVF؅ཧΦϒδΣΫτ w JODMVEFMJOVYNTHI 18 struct msg_queue { 19 struct kern_ipc_perm

    q_perm; 20 time_t q_stime; /* last msgsnd time */ 21 time_t q_rtime; /* last msgrcv time */ 22 time_t q_ctime; /* last change time */ 23 unsigned long q_cbytes; /* current number of bytes on queue */ 24 unsigned long q_qnum; /* number of messages in queue */ 25 unsigned long q_qbytes; /* max number of bytes on queue */ 26 pid_t q_lspid; /* pid of last msgsnd */ 27 pid_t q_lrpid; /* last receive pid */ 28 29 struct list_head q_messages; 30 struct list_head q_receivers; 31 struct list_head q_senders; 32 };
  8. *1$ύʔϛογϣϯ؅ཧ w JODMVEFMJOVYJQDI 11 struct kern_ipc_perm 12 { 13 spinlock_t

    lock; 14 bool deleted; 15 int id; 16 key_t key; 17 kuid_t uid; 18 kgid_t gid; 19 kuid_t cuid; 20 kgid_t cgid; 21 umode_t mode; 22 unsigned long seq; 23 void *security; 24 }; LVJE@UɾLHJE@U͸ίϯςφܕԾ૝ Խʹ͓͍ͯɺIPTUଆͷVJEHJUͱ HVFTUଆͷVJEQJEΛϚοϐϯά͢ ΔͨΊͷܕɻ ࢀরɿIUUQXXXTMJEFTIBSFOFU NBTBNJJDIJLBXBMJOVYOBNFTQBDF
  9. *1$*EFOUJpFS w *1$/BNFTQBDF୯ҐͰ؅ཧ w ؅ཧ͢Δߏ଄ମ͸TUSVDUJQD@JET w TUSVDUJQD@OBNFTQBDFͷϝϯόม਺JET<>ʹͯ؅ཧ 29 struct ipc_namespace

    { 30 atomic_t count; 31 struct ipc_ids ids[3]; w ॳظԽͷλΠϛϯά͸*1$OBNFTQBDF࡞੒࣌ͷ DSFBUF@JQD@OT  w JQD@BEEJE ʹͯJEΛઃఆ
  10. TUSVDUJQD@JET w JODMVEFMJOVYJQD@OBNFTQBDFI 21 struct ipc_ids { 22 int in_use;

    23 unsigned short seq; 24 struct rw_semaphore rwsem; 25 struct idr ipcs_idr; 26 int next_id; 27 }; *1$0CKFDU࡞੒࣌ʹ  *1$@3.*%ͷૢ࡞࣌ʹ ௨ৗ͸Λઃఆ͠ɺ͜ͷ൪߸Λ࢖͍͍ͨʂͱ͍͏৔߹ʹTZTDUMͰઃఆՄೳɻ ΧʔωϧͷDPOpHͰ$0/'*(@$)&$,10*/5@3&4503&͕ઃఆ͞Ε͍ͯΔඞཁ͋Γɻ IUUQTHJUIVCDPNUPSWBMETMJOVYDPNNJUGGBGCDGDCDFCCD JEΛৼΔͱ͖ʹ࢖༻
  11. JQD@BEEJE w ؔ਺ϓϩτλΠϓ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new,

    int size) w *1$*EFOUJpFS͸OFXJEʹઃఆ͞ΕΔ w ໭Γ஋ͷJOU͸JES@BMMPD ͷ໭Γ஋ w JES@BMMPD ͷ໭Γ஋΋JE͚ͩͲɺJQD@BEEJE ͷݺͼग़͠ݩ͸͜ͷม਺͸ΤϥʔνΣοΫʹ ͔͠࢖༻͠ͳ͍
  12. JQD@BEEJE ॲཧ֓ཁ w *1$*EFOUJpFSͷ࢖༻਺ JO@VTF ͷνΣοΫ w ࠷େͰ w *%3"1*ʹΑΔJEͷऔಘ

    w JO@VTFΛΠϯΫϦϝϯτ w LFSO@JQD@QFSNߏ଄ମʹFVJEɺFHJEΛઃఆ w OFYU@JEͷ஋ʹԠͯ͡TFRม਺Λઃఆ w JQD@CVJMEJE ͰJEʹͳΔ஋Λܭࢉͯ͠ฦ͢
  13. ؅ཧ༻ΦϒδΣΫτͷBMMPD w ϝϞϦ֬อ͸͢΂ͯJQD@SDV@BMMPD ʹ࣮ͯࢪ 477 void *ipc_rcu_alloc(int size) 478 {

    479 /* 480 * We prepend the allocation with the rcu struct 481 */ 482 struct ipc_rcu *out = ipc_alloc(sizeof(struct ipc_rcu) + size); 483 if (unlikely(!out)) 484 return NULL; 485 atomic_set(&out->refcount, 1); 486 return out + 1; 487 }
  14. ػೳݻ༗ॲཧͷొ࿥ w JQDVUJMIʹ͋ΔTUSVDUJQD@PQTʹؔ਺Ληοτ 80 struct ipc_ops { 81 int (*getnew)(struct

    ipc_namespace *, struct ipc_params *); 82 int (*associate)(struct kern_ipc_perm *, int); 83 int (*more_checks)(struct kern_ipc_perm *, struct ipc_params *); 84 }; w HFUOFX  w ৽نʹ*1$ͷΦϒδΣΫτΛ࡞੒ w BTTPDJBUF  w ύʔϛογϣϯͷνΣοΫ w NPSF@DIFDLT  w ͦͷଞͷνΣοΫ w PQUJPOBM
  15. JQDHFU ͷྲྀΕ ipcget() --> ipcget_new() // if key is IPC_PRIVATE

    --> ipcget_public() // if key is not IPC_PRIVATE --> ipc_findkey() // find key --> ipcget_new() // if key is not found --> ipc_check_perms() // if found key --> (struct ips_ops *)->more_checks() --> (struct ips_ops *)->associate() ipcget_new() --> (struct ipc_ops *)->getnew()
  16. JQDHFU@QVCMJD w LFZ͕OBNFTQBDFதʹ͋Δ͔νΣοΫ w LFZ͕ݟ͔ͭΒͳ͔ͬͨΒJQDHFU@OFX ΛݺͿ w LFZ͕ݟ͔ͭͬͨ w qBHͷνΣοΫ

    w JQD@PQTͷNPSF@DIFDLT͕ηοτ͞Ε͍ͯΕ͹ͦΕΛݺͿ w JQD@DIFDL@QFSNT ͰύʔϛογϣϯͷνΣοΫͱJQD@PQTͷ BTTPDJBUF ݺͼग़͠ w JQD@DIFDL@QFSNT͕γεςϜίʔϧ YYYHFU ͷ໭Γ஋ͱͳΔJEΛฦ ͢
  17. PWFSWJFX w MJCDଆ w LFSOFMଆ w ओͳߏ଄ମ w *1$શൠͷڞ௨ॲཧ w

    ڞ௨ॲཧ TINHFUɺTFNHFUɺNTHHFUڞ௨෦  w ڞ༗ϝϞϦ
  18. TINHFU w ڞ༗ϝϞϦͷηάϝϯτΛ࡞੒ w JQDHFU ͔ΒOFXTFH ͕ݺ͹ΕΔ w TUSVDUJQT@PQTͷHFUOFXʹOFXTFH Λઃఆ

    w *1$ͷ؅ཧΦϒδΣΫτ͸TUSVDUTINJE@LFSOFM w ࡞੒ͨ͠ΦϒδΣΫτ͸DVSSFOUTZTWTINTIN@DMJTUʹͭͳ͕Δ w ηάϝϯτ͸ٖࣅϑΝΠϧͱͯ͠࡞Δ w 4:47YYYYYYYY Y͸LFZ ͱ͍͏ϑΝΠϧ໊ w MTPG͢ΔͱҎԼͷΑ͏ʹදࣔ͞ΕΔ 41997 a.out 29678 root DEL REG 0,4 27394068 /SYSV00000000 w qBHʹ4).@)6(&5-#ʹΑΓϑΝΠϧͷ࡞੒ํ๏͕มΘΔ w IVHFUMC@pMF@TFUVQ PSTINFN@pMF@TFUVQ
  19. ηάϝϯτ࡞੒ͷྲྀΕ w TINFN@pMF@TFUVQ  w NNTINFNDͷ@@TINFN@pMF@TFUVQ ͕ຊ ମ __shmem_file_setup() -->

    mntget() // path to mount directory --> d_alloc_pseudo() // allocate a dentry --> shmem_get_inode() // get an inode --> alloc_file() // allocate a file object
  20. TINBU w ࣮ࡍͷॲཧ͸EP@TINBU  w ڞ༗ϝϞϦݻ༗σʔλ࡞੒ w TUSVDUpMFͷQSJWBUF@EBUB΁ઃఆ͢Δ w ϑΝΠϧΦϒδΣΫτ

    TUSVDUpMF ͷ࡞੒ w ϑΝΠϧ͸TINHFU Ͱ࡞੒ٖͨ͠ࣅϑΝΠϧΛ࢖༻ w ϑΝΠϧΛNNBQ ͰϝϞϦʹϚοϓ w ࢖༻͢Δͷ͸EP@NNBQ@QHP⒎  w EP@NNBQ@QHP⒎ ͷฦΓ஋͕TINBU  ͷฦΓ஋ʹͳΔ
  21. ηάϝϯτBUUBDIͷྲྀΕ do_shmat() --> path_get() // get a file path for

    shmem --> alloc_file() // allocate a file object --> do_mmap_pgoff() // map the file object --> shm_may_destroy() // check if other process destroying this shmem --> shm_destroy() // other process destorying this shmem --> shm_unlock() // no one destorys this shmem
  22. ڞ༗ϝϞϦݻ༗σʔλ 50 struct shm_file_data { 51 int id; 52 struct

    ipc_namespace *ns; 53 struct file *file; 54 const struct vm_operations_struct *vm_ops; 55 }; w ҎԼͷσʔλΛઃఆ w JEʹ͸LFZ w OTʹ͸ΧϨϯτϓϩηεͷJQDOBNFTQBDF w JQDOBNFTQBDFͷϦϑΝϨϯεΧ΢ϯλΛΠϯΫϦϝϯτ͢Δ w pMFʹ͸OFXTFH Ͱ࡞੒ͨ͠ϑΝΠϧΦϒδΣΫτ w WN@PQTʹ͸/6--
  23. ηάϝϯτEFUBUDIͷྲྀΕ shmdt() --> find_vma() // find a vm_area_struct from address

    --> file_inode() // if vma is found, find an inode to get file(segment) size --> do_munmap() // unmap the address --> do_munmap()
  24. TINEU w ॲཧ͸$0/'*(@..6͕EFpOF͞Ε͍ͯΔ͔Ͱҧ͏͕ɺ͜͜Ͱ͸EFpOF͞Ε͍ͯ Δ৔߹ΛݟΔ w EP@VONBQ ͸ෳ਺ճݺ͹ΕΔՄೳੑ͕͋Δ w pOE@WNB ͰWNB͕ݟ͔ͭͬͨ৔߹

    w VONBQର৅ͷΞυϨεΛ୳͠ɺVONBQ ͢Δ w ͜Ε͸ճ͚ͩ w WNB͕/6--Ͱͳ͘ɺWNBWN@FOEBEESͷ݁Ռ͕ηάϝϯταΠζΑΓখ͞ ͍ؒ w ͜͜Ͱ͸ෳ਺ճEP@VONBQ ΛݺͿՄೳੑ͕͋Δ
  25. ηάϝϯτഁغͷྲྀΕ shmctl() --> shmctl_down() --> do_shm_rmid() --> shm_unlock() // someone

    using this segment --> shm_destroy() // anyone using it --> shm_rmid() // remove id from namespace and segment's list --> shm_unlock() --> shm_lock() // if shmflg is not SHM_HUGETLB --> user_shm_unlock() // else if (struct shmid *)- >mlock_user isn't 0 --> fput() // remove file object
  26. TIN@EFTUSPZ w TIN@SNJE ΛݺͿ w OBNFTQBDF͔ΒJEͷ࡟আ w TIN@DMJTUͷϦετ͔Β؅ཧΦϒδΣΫτΛ࡟আ w ηάϝϯτ͕IVHFUMCΛ࢖͍ͬͯͳ͍৔߹

    w TINFN@MPDL Ͱηάϝϯτ͕TXBQېࢭʹͳ͍ͬͯͨΒڐՄ͢ΔΑ͏ʹઃఆ w ΋͘͠͸NMPDL͞Ε͍ͯΔ৔߹ w ڞ༗ϝϞϦͰ࢖༻͢ΔNMPDLର৅ͷϖʔδ਺ΛTUSVDUVTFSͷMPDLFE@TIN͔ ΒݮΒ͢ w GQVU ͰOFXTFH Ͱ࡞੒ͨ͠ηάϝϯτ༻ͷϑΝΠϧΦϒδΣΫτΛ࡟আ