epoll.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. #ifndef DASYNQ_EPOLL_H_
  2. #define DASYNQ_EPOLL_H_
  3. #include <system_error>
  4. #include <mutex>
  5. #include <type_traits>
  6. #include <unordered_map>
  7. #include <vector>
  8. #include <sys/epoll.h>
  9. #include <sys/signalfd.h>
  10. #include <sys/types.h>
  11. #include <sys/wait.h>
  12. #include <unistd.h>
  13. #include <csignal>
  14. namespace dasynq {
  15. inline namespace v2 {
  16. namespace dprivate {
  17. class proc_status; // forward declaration
  18. }
  19. template <class Base> class epoll_loop;
  20. class epoll_traits
  21. {
  22. template <class Base> friend class epoll_loop;
  23. public:
  24. class sigdata_t
  25. {
  26. template <class Base> friend class epoll_loop;
  27. struct signalfd_siginfo info;
  28. public:
  29. // mandatory:
  30. int get_signo() { return info.ssi_signo; }
  31. int get_sicode() { return info.ssi_code; }
  32. pid_t get_sipid() { return info.ssi_pid; }
  33. uid_t get_siuid() { return info.ssi_uid; }
  34. void * get_siaddr() { return reinterpret_cast<void *>(info.ssi_addr); }
  35. int get_sistatus() { return info.ssi_status; }
  36. int get_sival_int() { return info.ssi_int; }
  37. void * get_sival_ptr() { return reinterpret_cast<void *>(info.ssi_ptr); }
  38. // XSI
  39. int get_sierrno() { return info.ssi_errno; }
  40. // XSR (streams) OB (obselete)
  41. int get_siband() { return info.ssi_band; }
  42. // Linux:
  43. int32_t get_sifd() { return info.ssi_fd; }
  44. uint32_t get_sittimerid() { return info.ssi_tid; }
  45. uint32_t get_sioverrun() { return info.ssi_overrun; }
  46. uint32_t get_sitrapno() { return info.ssi_trapno; }
  47. uint32_t get_siutime() { return info.ssi_utime; }
  48. uint32_t get_sistime() { return info.ssi_stime; }
  49. // Field exposed by Linux kernel but not Glibc:
  50. // uint16_t get_siaddr_lsb() { return info.ssi_addr_lsb; }
  51. void set_signo(int signo) { info.ssi_signo = signo; }
  52. };
  53. using proc_status_t = dprivate::proc_status;
  54. class fd_r;
  55. // File descriptor optional storage. If the mechanism can return the file descriptor, this
  56. // class will be empty, otherwise it can hold a file descriptor.
  57. class fd_s {
  58. friend class fd_r;
  59. // Epoll doesn't return the file descriptor (it can, but it can't return both file
  60. // descriptor and user data).
  61. int fd;
  62. public:
  63. fd_s(int fd_p) noexcept : fd(fd_p) { }
  64. };
  65. // File descriptor reference (passed to event callback). If the mechanism can return the
  66. // file descriptor, this class holds the file descriptor. Otherwise, the file descriptor
  67. // must be stored in an fd_s instance.
  68. class fd_r {
  69. public:
  70. int get_fd(fd_s ss)
  71. {
  72. return ss.fd;
  73. }
  74. };
  75. constexpr static bool has_bidi_fd_watch = true;
  76. constexpr static bool has_separate_rw_fd_watches = false;
  77. constexpr static bool interrupt_after_fd_add = false;
  78. constexpr static bool interrupt_after_signal_add = false;
  79. constexpr static bool supports_non_oneshot_fd = true;
  80. };
  81. template <class Base> class epoll_loop : public Base
  82. {
  83. int epfd = -1; // epoll fd
  84. int sigfd = -1; // signalfd fd; -1 if not initialised
  85. sigset_t sigmask;
  86. std::unordered_map<int, void *> sigdataMap;
  87. // Base contains:
  88. // lock - a lock that can be used to protect internal structure.
  89. // receive*() methods will be called with lock held.
  90. // receive_signal(sigdata_t &, user *) noexcept
  91. // receive_fd_event(fd_r, user *, int flags) noexcept
  92. using sigdata_t = epoll_traits::sigdata_t;
  93. using fd_r = typename epoll_traits::fd_r;
  94. void process_events(epoll_event *events, int r)
  95. {
  96. std::lock_guard<decltype(Base::lock)> guard(Base::lock);
  97. for (int i = 0; i < r; i++) {
  98. void * ptr = events[i].data.ptr;
  99. if (ptr == &sigfd) {
  100. // Signal
  101. sigdata_t siginfo;
  102. while (true) {
  103. int r = read(sigfd, &siginfo.info, sizeof(siginfo.info));
  104. if (r == -1) break;
  105. auto iter = sigdataMap.find(siginfo.get_signo());
  106. if (iter != sigdataMap.end()) {
  107. void *userdata = (*iter).second;
  108. if (Base::receive_signal(*this, siginfo, userdata)) {
  109. sigdelset(&sigmask, siginfo.get_signo());
  110. }
  111. }
  112. }
  113. signalfd(sigfd, &sigmask, SFD_NONBLOCK | SFD_CLOEXEC);
  114. }
  115. else {
  116. int flags = 0;
  117. (events[i].events & EPOLLIN) && (flags |= IN_EVENTS);
  118. (events[i].events & EPOLLOUT) && (flags |= OUT_EVENTS);
  119. // We mustn't introduce IN/OUT events for error conditions as we don't know which are being
  120. // watched! Just set ERR_EVENTS.
  121. (events[i].events & EPOLLHUP) && (flags |= ERR_EVENTS);
  122. (events[i].events & EPOLLERR) && (flags |= ERR_EVENTS);
  123. auto r = Base::receive_fd_event(*this, fd_r(), ptr, flags);
  124. if (std::get<0>(r) != 0) {
  125. enable_fd_watch_nolock(fd_r().get_fd(std::get<1>(r)), ptr, std::get<0>(r));
  126. }
  127. }
  128. }
  129. }
  130. public:
  131. /**
  132. * epoll_loop constructor.
  133. *
  134. * Throws std::system_error or std::bad_alloc if the event loop cannot be initialised.
  135. */
  136. epoll_loop()
  137. {
  138. init();
  139. }
  140. epoll_loop(typename Base::delayed_init d) noexcept
  141. {
  142. // delayed initialisation
  143. }
  144. void init()
  145. {
  146. epfd = epoll_create1(EPOLL_CLOEXEC);
  147. if (epfd == -1) {
  148. throw std::system_error(errno, std::system_category());
  149. }
  150. sigemptyset(&sigmask);
  151. try {
  152. Base::init(this);
  153. }
  154. catch (...) {
  155. close(epfd);
  156. throw;
  157. }
  158. }
  159. ~epoll_loop() noexcept
  160. {
  161. if (epfd != -1) {
  162. Base::cleanup();
  163. close(epfd);
  164. if (sigfd != -1) {
  165. close(sigfd);
  166. }
  167. }
  168. }
  169. // fd: file descriptor to watch
  170. // userdata: data to associate with descriptor
  171. // flags: IN_EVENTS | OUT_EVENTS | ONE_SHOT
  172. // soft_fail: true if unsupported file descriptors should fail by returning false instead
  173. // of throwing an exception
  174. // returns: true on success; false if file descriptor type isn't supported and soft_fail == true
  175. // throws: std::system_error or std::bad_alloc on failure
  176. bool add_fd_watch(int fd, void *userdata, int flags, bool enabled = true, bool soft_fail = false)
  177. {
  178. struct epoll_event epevent;
  179. // epevent.data.fd = fd;
  180. epevent.data.ptr = userdata;
  181. epevent.events = 0;
  182. if (flags & ONE_SHOT) {
  183. epevent.events = EPOLLONESHOT;
  184. }
  185. if ((flags & IN_EVENTS) && enabled) {
  186. epevent.events |= EPOLLIN;
  187. }
  188. if ((flags & OUT_EVENTS) && enabled) {
  189. epevent.events |= EPOLLOUT;
  190. }
  191. if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &epevent) == -1) {
  192. if (soft_fail && errno == EPERM) {
  193. return false;
  194. }
  195. throw std::system_error(errno, std::system_category());
  196. }
  197. return true;
  198. }
  199. bool add_bidi_fd_watch(int fd, void *userdata, int flags, bool emulate)
  200. {
  201. // No implementation.
  202. throw std::system_error(std::make_error_code(std::errc::not_supported));
  203. }
  204. // flags specifies which watch to remove; ignored if the loop doesn't support
  205. // separate read/write watches.
  206. void remove_fd_watch(int fd, int flags) noexcept
  207. {
  208. epoll_ctl(epfd, EPOLL_CTL_DEL, fd, nullptr);
  209. }
  210. void remove_fd_watch_nolock(int fd, int flags) noexcept
  211. {
  212. remove_fd_watch(fd, flags);
  213. }
  214. void remove_bidi_fd_watch(int fd) noexcept
  215. {
  216. // Shouldn't be called for epoll.
  217. remove_fd_watch(fd, IN_EVENTS | OUT_EVENTS);
  218. }
  219. // Note this will *replace* the old flags with the new, that is,
  220. // it can enable *or disable* read/write events.
  221. void enable_fd_watch(int fd, void *userdata, int flags) noexcept
  222. {
  223. struct epoll_event epevent;
  224. // epevent.data.fd = fd;
  225. epevent.data.ptr = userdata;
  226. epevent.events = 0;
  227. if (flags & ONE_SHOT) {
  228. epevent.events = EPOLLONESHOT;
  229. }
  230. if (flags & IN_EVENTS) {
  231. epevent.events |= EPOLLIN;
  232. }
  233. if (flags & OUT_EVENTS) {
  234. epevent.events |= EPOLLOUT;
  235. }
  236. if (epoll_ctl(epfd, EPOLL_CTL_MOD, fd, &epevent) == -1) {
  237. // Shouldn't be able to fail
  238. // throw std::system_error(errno, std::system_category());
  239. }
  240. }
  241. void enable_fd_watch_nolock(int fd, void *userdata, int flags) noexcept
  242. {
  243. enable_fd_watch(fd, userdata, flags);
  244. }
  245. void disable_fd_watch(int fd, int flags) noexcept
  246. {
  247. struct epoll_event epevent;
  248. // epevent.data.fd = fd;
  249. epevent.data.ptr = nullptr;
  250. epevent.events = 0;
  251. // Epoll documentation says that hangup will still be reported, need to check
  252. // whether this is really the case. Suspect it is really only the case if
  253. // EPOLLIN is set.
  254. if (epoll_ctl(epfd, EPOLL_CTL_MOD, fd, &epevent) == -1) {
  255. // Let's assume that this can't fail.
  256. // throw std::system_error(errno, std::system_category());
  257. }
  258. }
  259. void disable_fd_watch_nolock(int fd, int flags) noexcept
  260. {
  261. disable_fd_watch(fd, flags);
  262. }
  263. // Note signal should be masked before call.
  264. void add_signal_watch(int signo, void *userdata)
  265. {
  266. std::lock_guard<decltype(Base::lock)> guard(Base::lock);
  267. add_signal_watch_nolock(signo, userdata);
  268. }
  269. // Note signal should be masked before call.
  270. void add_signal_watch_nolock(int signo, void *userdata)
  271. {
  272. sigdataMap[signo] = userdata;
  273. // Modify the signal fd to watch the new signal
  274. bool was_no_sigfd = (sigfd == -1);
  275. sigaddset(&sigmask, signo);
  276. sigfd = signalfd(sigfd, &sigmask, SFD_NONBLOCK | SFD_CLOEXEC);
  277. if (sigfd == -1) {
  278. throw std::system_error(errno, std::system_category());
  279. }
  280. if (was_no_sigfd) {
  281. // Add the signalfd to the epoll set.
  282. struct epoll_event epevent;
  283. epevent.data.ptr = &sigfd;
  284. epevent.events = EPOLLIN;
  285. // No need for EPOLLONESHOT - we can pull the signals out
  286. // as we see them.
  287. if (epoll_ctl(epfd, EPOLL_CTL_ADD, sigfd, &epevent) == -1) {
  288. close(sigfd);
  289. sigfd = -1;
  290. throw std::system_error(errno, std::system_category());
  291. }
  292. }
  293. }
  294. // Note, called with lock held:
  295. void rearm_signal_watch_nolock(int signo, void *userdata) noexcept
  296. {
  297. sigaddset(&sigmask, signo);
  298. signalfd(sigfd, &sigmask, SFD_NONBLOCK | SFD_CLOEXEC);
  299. }
  300. void remove_signal_watch_nolock(int signo) noexcept
  301. {
  302. sigdelset(&sigmask, signo);
  303. signalfd(sigfd, &sigmask, 0);
  304. }
  305. void remove_signal_watch(int signo) noexcept
  306. {
  307. std::lock_guard<decltype(Base::lock)> guard(Base::lock);
  308. remove_signal_watch_nolock(signo);
  309. }
  310. // If events are pending, process an unspecified number of them.
  311. // If no events are pending, wait until one event is received and
  312. // process this event (and possibly any other events received
  313. // simultaneously).
  314. // If processing an event removes a watch, there is a possibility
  315. // that the watched event will still be reported (if it has
  316. // occurred) before pull_events() returns.
  317. //
  318. // do_wait - if false, returns immediately if no events are
  319. // pending.
  320. void pull_events(bool do_wait)
  321. {
  322. epoll_event events[16];
  323. int r = epoll_wait(epfd, events, 16, do_wait ? -1 : 0);
  324. if (r == -1 || r == 0) {
  325. // signal or no events
  326. return;
  327. }
  328. do {
  329. process_events(events, r);
  330. r = epoll_wait(epfd, events, 16, 0);
  331. } while (r > 0);
  332. }
  333. };
  334. } // namespace v2
  335. } // namespace dasynq
  336. #endif /* DASYNQ_EPOLL_H_ */