workqueue: handle congestion by clearing queue

[userspace-rcu.git] / urcu / workqueue-fifo.h
diff --git a/urcu/workqueue-fifo.h b/urcu/workqueue-fifo.h

index 6256bff8218c87673fdfa865b39c4ec87ae6da83..ff0c650571a175ae6c809eee41d9f3135131ce36 100644 (file)
--- a/urcu/workqueue-fifo.h
+++ b/urcu/workqueue-fifo.h
@@ -36,6 +36,11 @@ enum urcu_accept_ret {
         URCU_ACCEPT_SHUTDOWN    = 1,
  };
  
+enum urcu_enqueue_ret {
+       URCU_ENQUEUE_OK         = 0,
+       URCU_ENQUEUE_FULL       = 1,
+};
+
  /*
   * We use RCU to steal work from siblings. Therefore, one of RCU flavors
   * need to be included before this header. All worker that participate
@@ -59,16 +64,26 @@ struct urcu_workqueue {
         struct cds_list_head sibling_head;
         pthread_mutex_t sibling_lock;   /* Protect sibling list updates */
  
+       /* Maximum number of work entries (approximate). 0 means infinite. */
+       unsigned long nr_work_max;
+       unsigned long nr_work;          /* Current number of work items */
+
+       int worker_flags;               /* Worker flags */
         bool shutdown;                  /* Shutdown performed */
  };
  
  struct urcu_worker {
+       /* Workqueue which can be either used by worker, or stolen. */
         struct cds_wfcq_head head;
         struct cds_wfcq_tail tail;
  
+       /* Work belonging to worker. Cannot be stolen. */
+       struct urcu_work *own;
+
         struct urcu_wait_node wait_node;
         /* RCU linked list node of siblings for work stealing. */
         struct cds_list_head sibling_node;
+       struct urcu_workqueue *queue;
         int flags;      /* enum urcu_worker_flags */
  };
  
@@ -77,19 +92,33 @@ enum urcu_worker_flags {
  };
  
  static inline
-void urcu_workqueue_init(struct urcu_workqueue *queue)
+void urcu_workqueue_init(struct urcu_workqueue *queue,
+               unsigned long max_queue_len,
+               int worker_flags)
  {
         __cds_wfcq_init(&queue->head, &queue->tail);
         urcu_wait_queue_init(&queue->waitqueue);
         CDS_INIT_LIST_HEAD(&queue->sibling_head);
+       pthread_mutex_init(&queue->sibling_lock, NULL);
+       queue->nr_work_max = max_queue_len;
+       queue->nr_work = 0;
         queue->shutdown = false;
  }
  
  static inline
-void urcu_queue_work(struct urcu_workqueue *queue, struct urcu_work *work)
+enum urcu_enqueue_ret urcu_queue_work(struct urcu_workqueue *queue,
+               struct urcu_work *work)
  {
         bool was_empty;
-
+       unsigned long nr_work_max;
+
+       nr_work_max = queue->nr_work_max;
+       if (nr_work_max) {
+               /* Approximate max queue size. */
+               if (uatomic_read(&queue->nr_work) >= nr_work_max)
+                       return URCU_ENQUEUE_FULL;
+               uatomic_inc(&queue->nr_work);
+       }
         cds_wfcq_node_init(&work->node);
  
         /* Enqueue work. */
@@ -113,6 +142,7 @@ void urcu_queue_work(struct urcu_workqueue *queue, struct urcu_work *work)
                 (void) urcu_dequeue_wake_single(&queue->waitqueue);
                 rcu_read_unlock();      /* Protect stack dequeue */
         }
+       return URCU_ENQUEUE_OK;
  }
  
  static inline
@@ -128,11 +158,15 @@ void __urcu_workqueue_wakeup_all(struct urcu_workqueue *queue)
  }
  
  static inline
-void urcu_worker_init(struct urcu_worker *worker, int flags)
+void urcu_worker_init(struct urcu_workqueue *queue,
+               struct urcu_worker *worker)
  {
         cds_wfcq_init(&worker->head, &worker->tail);
-       worker->flags = flags;
         urcu_wait_node_init(&worker->wait_node, URCU_WAIT_RUNNING);
+       worker->own = NULL;
+       worker->wait_node.node.next = NULL;
+       worker->queue = queue;
+       worker->flags = queue->worker_flags;
  }
  
  static inline
@@ -159,13 +193,10 @@ void urcu_worker_unregister(struct urcu_workqueue *queue,
         }
  
         /*
-        * Wait for grace period before freeing or reusing
-        * "worker" because used by RCU linked list.
-        * Also prevents ABA for waitqueue stack dequeue: matches RCU
-        * read-side critical sections around dequeue and move all
-        * operations on waitqueue).
+        * Make sure we are removed from waitqueue.
          */
-       synchronize_rcu();
+       if (CMM_LOAD_SHARED(worker->wait_node.node.next))
+               __urcu_workqueue_wakeup_all(queue);
  
         /*
          * Put any local work we still have back into the workqueue.
@@ -184,29 +215,91 @@ void urcu_worker_unregister(struct urcu_workqueue *queue,
                 (void) urcu_dequeue_wake_single(&queue->waitqueue);
                 rcu_read_unlock();      /* Protect stack dequeue */
         }
+
+       /*
+        * Wait for grace period before freeing or reusing
+        * "worker" because used by RCU linked list.
+        * Also prevents ABA for waitqueue stack dequeue: matches RCU
+        * read-side critical sections around dequeue and move all
+        * operations on waitqueue).
+        */
+       synchronize_rcu();
  }
  
-/*
- * Try stealing work from siblings when we have nothing to do.
- */
  static inline
-bool ___urcu_steal_work(struct urcu_worker *worker,
-               struct urcu_worker *sibling)
+bool ___urcu_grab_work(struct urcu_worker *worker,
+               cds_wfcq_head_ptr_t src_head,
+               struct cds_wfcq_tail *src_tail,
+               bool steal)
  {
         enum cds_wfcq_ret splice_ret;
+       struct __cds_wfcq_head tmp_head;
+       struct cds_wfcq_tail tmp_tail;
+       struct cds_wfcq_node *node;
  
         /*
-        * Don't bother grabbing the sibling queue lock if it is empty.
+        * Don't bother grabbing the src queue lock if it is empty.
          */
-       if (cds_wfcq_empty(&sibling->head, &sibling->tail))
+       if (cds_wfcq_empty(src_head, src_tail))
                 return false;
-       splice_ret = cds_wfcq_splice_blocking(&worker->head,
+       __cds_wfcq_init(&tmp_head, &tmp_tail);
+
+       /* Ensure that we preserve FIFO work order. */
+       assert(!steal || worker->own == NULL);
+
+       /* Splice to temporary queue. */
+       if (steal)
+               cds_wfcq_dequeue_lock(src_head.h, src_tail);
+       splice_ret = __cds_wfcq_splice_blocking(&tmp_head,
+                       &tmp_tail,
+                       src_head,
+                       src_tail);
+       if (steal)
+               cds_wfcq_dequeue_unlock(src_head.h, src_tail);
+       if (splice_ret == CDS_WFCQ_RET_SRC_EMPTY)
+               return false;
+
+       /*
+        * Keep one work entry for ourself. This ensures forward
+        * progress amongst stealing co-workers. This also ensures that
+        * when a worker grab some work from the global workqueue, it
+        * will have at least one work item to deal with.
+        */
+       if (worker->own == NULL) {
+               if (!steal) {
+                       /*
+                        * Try to grab own work from worker workqueue to
+                        * preserve FIFO order.
+                        */
+                       node = cds_wfcq_dequeue_blocking(&worker->head,
+                               &worker->tail);
+                       if (node)
+                               goto got_node;
+               }
+               node = __cds_wfcq_dequeue_blocking(&tmp_head, &tmp_tail);
+               assert(node != NULL);
+got_node:
+               worker->own = caa_container_of(node, struct urcu_work, node);
+       }
+
+       /* Splice into worker workqueue. */
+       splice_ret = __cds_wfcq_splice_blocking(&worker->head,
                         &worker->tail,
-                       &sibling->head,
-                       &sibling->tail);
+                       &tmp_head,
+                       &tmp_tail);
         /* Ensure that we preserve FIFO work order. */
-       assert(splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
-       return splice_ret != CDS_WFCQ_RET_SRC_EMPTY;
+       assert(!steal || splice_ret != CDS_WFCQ_RET_DEST_NON_EMPTY);
+       return true;
+}
+
+/*
+ * Try stealing work from siblings when we have nothing to do.
+ */
+static inline
+bool ___urcu_steal_work(struct urcu_worker *worker,
+               struct urcu_worker *sibling)
+{
+       return ___urcu_grab_work(worker, &sibling->head, &sibling->tail, 1);
  }
  
  static inline
@@ -292,19 +385,16 @@ end:
  }
  
  static inline
-enum urcu_accept_ret urcu_accept_work(struct urcu_workqueue *queue,
-               struct urcu_worker *worker)
+enum urcu_accept_ret urcu_accept_work(struct urcu_worker *worker)
  {
+       struct urcu_workqueue *queue = worker->queue;
         enum cds_wfcq_ret wfcq_ret;
+       bool has_work;
  
-       wfcq_ret = __cds_wfcq_splice_blocking(&worker->head,
-                       &worker->tail,
-                       &queue->head,
-                       &queue->tail);
+       has_work = ___urcu_grab_work(worker, &queue->head, &queue->tail, 0);
         /* Don't wait if we have work to do. */
-       if (wfcq_ret != CDS_WFCQ_RET_SRC_EMPTY
-                       || !cds_wfcq_empty(&worker->head,
-                               &worker->tail))
+       if (has_work || worker->own
+                       || !cds_wfcq_empty(&worker->head, &worker->tail))
                 goto do_work;
         /* Try to steal work from sibling instead of blocking */
         if (__urcu_steal_work(queue, worker))
@@ -367,8 +457,16 @@ do_work:
  static inline
  struct urcu_work *urcu_dequeue_work(struct urcu_worker *worker)
  {
+       struct urcu_workqueue *queue = worker->queue;
         struct cds_wfcq_node *node;
+       struct urcu_work *work;
  
+       if (worker->own) {
+               /* Process our own work entry. */
+               work = worker->own;
+               worker->own = NULL;
+               goto end;
+       }
         /*
          * If we are registered for work stealing, we need to dequeue
          * safely against siblings.
@@ -388,7 +486,11 @@ struct urcu_work *urcu_dequeue_work(struct urcu_worker *worker)
         }
         if (!node)
                 return NULL;
-       return caa_container_of(node, struct urcu_work, node);
+       work = caa_container_of(node, struct urcu_work, node);
+end:
+       if (queue->nr_work_max)
+               uatomic_dec(&queue->nr_work);
+       return work;
  }
  
  static inline
@@ -400,4 +502,29 @@ void urcu_workqueue_shutdown(struct urcu_workqueue *queue)
         __urcu_workqueue_wakeup_all(queue);
  }
  
+/*
+ * Use to let dispatcher steal work from the entire queue in case of
+ * stall. The "worker" parameter need to be intialized, but is usually
+ * not registered.
+ */
+static inline
+bool urcu_workqueue_steal_all(struct urcu_workqueue *queue,
+               struct urcu_worker *worker)
+{
+       struct urcu_worker *sibling;
+       bool has_work = false;
+
+       rcu_read_lock();
+       /* Steal from each worker */
+       cds_list_for_each_entry_rcu(sibling, &queue->sibling_head,
+                       sibling_node)
+               has_work |= ___urcu_grab_work(worker, &sibling->head,
+                                       &sibling->tail, 1);
+       rcu_read_unlock();
+
+       /* Steal from global workqueue */
+       has_work |= ___urcu_grab_work(worker, &queue->head, &queue->tail, 0);
+       return has_work;
+}
+
  #endif /* _URCU_WORKQUEUE_FIFO_H */