Merge 4.19.223 into android-4.19-stable
Changes in 4.19.223 net: usb: lan78xx: add Allied Telesis AT29M2-AF block, bfq: improve asymmetric scenarios detection block, bfq: fix asymmetric scenarios detection block, bfq: fix decrement of num_active_groups block, bfq: fix queue removal from weights tree block, bfq: fix use after free in bfq_bfqq_expire HID: holtek: fix mouse probing arm64: dts: allwinner: orangepi-zero-plus: fix PHY mode spi: change clk_disable_unprepare to clk_unprepare IB/qib: Fix memory leak in qib_user_sdma_queue_pkts() netfilter: fix regression in looped (broad|multi)cast's MAC handling qlcnic: potential dereference null pointer of rx_queue->page_ring net: accept UFOv6 packages in virtio_net_hdr_to_skb net: skip virtio_net_hdr_set_proto if protocol already set ipmi: Fix UAF when uninstall ipmi_si and ipmi_msghandler module bonding: fix ad_actor_system option setting to default fjes: Check for error irq drivers: net: smc911x: Check for error irq sfc: falcon: Check null pointer of rx_queue->page_ring hwmon: (lm90) Fix usage of CONFIG2 register in detect function ALSA: jack: Check the return value of kstrdup() ALSA: drivers: opl3: Fix incorrect use of vp->state Input: atmel_mxt_ts - fix double free in mxt_read_info_block ipmi: bail out if init_srcu_struct fails ipmi: fix initialization when workqueue allocation fails parisc: Correct completer in lws start x86/pkey: Fix undefined behaviour with PKRU_WD_BIT pinctrl: stm32: consider the GPIO offset to expose all the GPIO lines ARM: 9169/1: entry: fix Thumb2 bug in iWMMXt exception handling f2fs: fix to do sanity check on last xattr entry in __f2fs_setxattr() usb: gadget: u_ether: fix race in setting MAC address in setup phase KVM: VMX: Fix stale docs for kvm-intel.emulate_invalid_guest_state hwmon: (lm90) Do not report 'busy' status bit as alarm ax25: NPD bug when detaching AX25 device hamradio: defer ax25 kfree after unregister_netdev hamradio: improve the incomplete fix to avoid NPD phonet/pep: refuse to enable an unbound pipe Linux 4.19.223 Signed-off-by: Greg Kroah-Hartman <gregkh@google.com> Change-Id: I852f8dadac2af1140ce6c0692ad03942fb076b95
This commit is contained in:
@@ -625,12 +625,13 @@ void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
}
|
||||
|
||||
/*
|
||||
* Tell whether there are active queues or groups with differentiated weights.
|
||||
* Tell whether there are active queues with different weights or
|
||||
* active groups.
|
||||
*/
|
||||
static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
||||
static bool bfq_varied_queue_weights_or_active_groups(struct bfq_data *bfqd)
|
||||
{
|
||||
/*
|
||||
* For weights to differ, at least one of the trees must contain
|
||||
* For queue weights to differ, queue_weights_tree must contain
|
||||
* at least two nodes.
|
||||
*/
|
||||
return (!RB_EMPTY_ROOT(&bfqd->queue_weights_tree) &&
|
||||
@@ -638,9 +639,7 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
||||
bfqd->queue_weights_tree.rb_node->rb_right)
|
||||
#ifdef CONFIG_BFQ_GROUP_IOSCHED
|
||||
) ||
|
||||
(!RB_EMPTY_ROOT(&bfqd->group_weights_tree) &&
|
||||
(bfqd->group_weights_tree.rb_node->rb_left ||
|
||||
bfqd->group_weights_tree.rb_node->rb_right)
|
||||
(bfqd->num_groups_with_pending_reqs > 0
|
||||
#endif
|
||||
);
|
||||
}
|
||||
@@ -658,26 +657,25 @@ static bool bfq_differentiated_weights(struct bfq_data *bfqd)
|
||||
* 3) all active groups at the same level in the groups tree have the same
|
||||
* number of children.
|
||||
*
|
||||
* Unfortunately, keeping the necessary state for evaluating exactly the
|
||||
* above symmetry conditions would be quite complex and time-consuming.
|
||||
* Therefore this function evaluates, instead, the following stronger
|
||||
* sub-conditions, for which it is much easier to maintain the needed
|
||||
* state:
|
||||
* Unfortunately, keeping the necessary state for evaluating exactly
|
||||
* the last two symmetry sub-conditions above would be quite complex
|
||||
* and time consuming. Therefore this function evaluates, instead,
|
||||
* only the following stronger two sub-conditions, for which it is
|
||||
* much easier to maintain the needed state:
|
||||
* 1) all active queues have the same weight,
|
||||
* 2) all active groups have the same weight,
|
||||
* 3) all active groups have at most one active child each.
|
||||
* In particular, the last two conditions are always true if hierarchical
|
||||
* support and the cgroups interface are not enabled, thus no state needs
|
||||
* to be maintained in this case.
|
||||
* 2) there are no active groups.
|
||||
* In particular, the last condition is always true if hierarchical
|
||||
* support or the cgroups interface are not enabled, thus no state
|
||||
* needs to be maintained in this case.
|
||||
*/
|
||||
static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
|
||||
{
|
||||
return !bfq_differentiated_weights(bfqd);
|
||||
return !bfq_varied_queue_weights_or_active_groups(bfqd);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the weight-counter tree passed as input contains no counter for
|
||||
* the weight of the input entity, then add that counter; otherwise just
|
||||
* the weight of the input queue, then add that counter; otherwise just
|
||||
* increment the existing counter.
|
||||
*
|
||||
* Note that weight-counter trees contain few nodes in mostly symmetric
|
||||
@@ -688,25 +686,25 @@ static bool bfq_symmetric_scenario(struct bfq_data *bfqd)
|
||||
* In most scenarios, the rate at which nodes are created/destroyed
|
||||
* should be low too.
|
||||
*/
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct rb_root *root)
|
||||
{
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
|
||||
/*
|
||||
* Do not insert if the entity is already associated with a
|
||||
* Do not insert if the queue is already associated with a
|
||||
* counter, which happens if:
|
||||
* 1) the entity is associated with a queue,
|
||||
* 2) a request arrival has caused the queue to become both
|
||||
* 1) a request arrival has caused the queue to become both
|
||||
* non-weight-raised, and hence change its weight, and
|
||||
* backlogged; in this respect, each of the two events
|
||||
* causes an invocation of this function,
|
||||
* 3) this is the invocation of this function caused by the
|
||||
* 2) this is the invocation of this function caused by the
|
||||
* second event. This second invocation is actually useless,
|
||||
* and we handle this fact by exiting immediately. More
|
||||
* efficient or clearer solutions might possibly be adopted.
|
||||
*/
|
||||
if (entity->weight_counter)
|
||||
if (bfqq->weight_counter)
|
||||
return;
|
||||
|
||||
while (*new) {
|
||||
@@ -716,7 +714,7 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
parent = *new;
|
||||
|
||||
if (entity->weight == __counter->weight) {
|
||||
entity->weight_counter = __counter;
|
||||
bfqq->weight_counter = __counter;
|
||||
goto inc_counter;
|
||||
}
|
||||
if (entity->weight < __counter->weight)
|
||||
@@ -725,68 +723,68 @@ void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
new = &((*new)->rb_right);
|
||||
}
|
||||
|
||||
entity->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
|
||||
GFP_ATOMIC);
|
||||
bfqq->weight_counter = kzalloc(sizeof(struct bfq_weight_counter),
|
||||
GFP_ATOMIC);
|
||||
|
||||
/*
|
||||
* In the unlucky event of an allocation failure, we just
|
||||
* exit. This will cause the weight of entity to not be
|
||||
* considered in bfq_differentiated_weights, which, in its
|
||||
* turn, causes the scenario to be deemed wrongly symmetric in
|
||||
* case entity's weight would have been the only weight making
|
||||
* the scenario asymmetric. On the bright side, no unbalance
|
||||
* will however occur when entity becomes inactive again (the
|
||||
* invocation of this function is triggered by an activation
|
||||
* of entity). In fact, bfq_weights_tree_remove does nothing
|
||||
* if !entity->weight_counter.
|
||||
* exit. This will cause the weight of queue to not be
|
||||
* considered in bfq_varied_queue_weights_or_active_groups,
|
||||
* which, in its turn, causes the scenario to be deemed
|
||||
* wrongly symmetric in case bfqq's weight would have been
|
||||
* the only weight making the scenario asymmetric. On the
|
||||
* bright side, no unbalance will however occur when bfqq
|
||||
* becomes inactive again (the invocation of this function
|
||||
* is triggered by an activation of queue). In fact,
|
||||
* bfq_weights_tree_remove does nothing if
|
||||
* !bfqq->weight_counter.
|
||||
*/
|
||||
if (unlikely(!entity->weight_counter))
|
||||
if (unlikely(!bfqq->weight_counter))
|
||||
return;
|
||||
|
||||
entity->weight_counter->weight = entity->weight;
|
||||
rb_link_node(&entity->weight_counter->weights_node, parent, new);
|
||||
rb_insert_color(&entity->weight_counter->weights_node, root);
|
||||
bfqq->weight_counter->weight = entity->weight;
|
||||
rb_link_node(&bfqq->weight_counter->weights_node, parent, new);
|
||||
rb_insert_color(&bfqq->weight_counter->weights_node, root);
|
||||
|
||||
inc_counter:
|
||||
entity->weight_counter->num_active++;
|
||||
bfqq->weight_counter->num_active++;
|
||||
bfqq->ref++;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decrement the weight counter associated with the entity, and, if the
|
||||
* Decrement the weight counter associated with the queue, and, if the
|
||||
* counter reaches 0, remove the counter from the tree.
|
||||
* See the comments to the function bfq_weights_tree_add() for considerations
|
||||
* about overhead.
|
||||
*/
|
||||
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_entity *entity,
|
||||
struct bfq_queue *bfqq,
|
||||
struct rb_root *root)
|
||||
{
|
||||
if (!entity->weight_counter)
|
||||
if (!bfqq->weight_counter)
|
||||
return;
|
||||
|
||||
entity->weight_counter->num_active--;
|
||||
if (entity->weight_counter->num_active > 0)
|
||||
bfqq->weight_counter->num_active--;
|
||||
if (bfqq->weight_counter->num_active > 0)
|
||||
goto reset_entity_pointer;
|
||||
|
||||
rb_erase(&entity->weight_counter->weights_node, root);
|
||||
kfree(entity->weight_counter);
|
||||
rb_erase(&bfqq->weight_counter->weights_node, root);
|
||||
kfree(bfqq->weight_counter);
|
||||
|
||||
reset_entity_pointer:
|
||||
entity->weight_counter = NULL;
|
||||
bfqq->weight_counter = NULL;
|
||||
bfq_put_queue(bfqq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invoke __bfq_weights_tree_remove on bfqq and all its inactive
|
||||
* parent entities.
|
||||
* Invoke __bfq_weights_tree_remove on bfqq and decrement the number
|
||||
* of active groups for each queue's inactive parent entity.
|
||||
*/
|
||||
void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq)
|
||||
{
|
||||
struct bfq_entity *entity = bfqq->entity.parent;
|
||||
|
||||
__bfq_weights_tree_remove(bfqd, &bfqq->entity,
|
||||
&bfqd->queue_weights_tree);
|
||||
|
||||
for_each_entity(entity) {
|
||||
struct bfq_sched_data *sd = entity->my_sched_data;
|
||||
|
||||
@@ -798,18 +796,37 @@ void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
* next_in_service for details on why
|
||||
* in_service_entity must be checked too).
|
||||
*
|
||||
* As a consequence, the weight of entity is
|
||||
* not to be removed. In addition, if entity
|
||||
* is active, then its parent entities are
|
||||
* active as well, and thus their weights are
|
||||
* not to be removed either. In the end, this
|
||||
* loop must stop here.
|
||||
* As a consequence, its parent entities are
|
||||
* active as well, and thus this loop must
|
||||
* stop here.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
__bfq_weights_tree_remove(bfqd, entity,
|
||||
&bfqd->group_weights_tree);
|
||||
|
||||
/*
|
||||
* The decrement of num_groups_with_pending_reqs is
|
||||
* not performed immediately upon the deactivation of
|
||||
* entity, but it is delayed to when it also happens
|
||||
* that the first leaf descendant bfqq of entity gets
|
||||
* all its pending requests completed. The following
|
||||
* instructions perform this delayed decrement, if
|
||||
* needed. See the comments on
|
||||
* num_groups_with_pending_reqs for details.
|
||||
*/
|
||||
if (entity->in_groups_with_pending_reqs) {
|
||||
entity->in_groups_with_pending_reqs = false;
|
||||
bfqd->num_groups_with_pending_reqs--;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Next function is invoked last, because it causes bfqq to be
|
||||
* freed if the following holds: bfqq is not in service and
|
||||
* has no dispatched request. DO NOT use bfqq after the next
|
||||
* function invocation.
|
||||
*/
|
||||
__bfq_weights_tree_remove(bfqd, bfqq,
|
||||
&bfqd->queue_weights_tree);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1003,7 +1020,8 @@ bfq_bfqq_resume_state(struct bfq_queue *bfqq, struct bfq_data *bfqd,
|
||||
|
||||
static int bfqq_process_refs(struct bfq_queue *bfqq)
|
||||
{
|
||||
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st;
|
||||
return bfqq->ref - bfqq->allocated - bfqq->entity.on_st -
|
||||
(bfqq->weight_counter != NULL);
|
||||
}
|
||||
|
||||
/* Empty burst list and add just bfqq (see comments on bfq_handle_burst) */
|
||||
@@ -2798,7 +2816,7 @@ static void bfq_dispatch_remove(struct request_queue *q, struct request *rq)
|
||||
bfq_remove_request(q, rq);
|
||||
}
|
||||
|
||||
static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
static bool __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
{
|
||||
/*
|
||||
* If this bfqq is shared between multiple processes, check
|
||||
@@ -2831,9 +2849,11 @@ static void __bfq_bfqq_expire(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
/*
|
||||
* All in-service entities must have been properly deactivated
|
||||
* or requeued before executing the next function, which
|
||||
* resets all in-service entites as no more in service.
|
||||
* resets all in-service entities as no more in service. This
|
||||
* may cause bfqq to be freed. If this happens, the next
|
||||
* function returns true.
|
||||
*/
|
||||
__bfq_bfqd_reset_in_service(bfqd);
|
||||
return __bfq_bfqd_reset_in_service(bfqd);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -3238,7 +3258,6 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
||||
bool slow;
|
||||
unsigned long delta = 0;
|
||||
struct bfq_entity *entity = &bfqq->entity;
|
||||
int ref;
|
||||
|
||||
/*
|
||||
* Check whether the process is slow (see bfq_bfqq_is_slow).
|
||||
@@ -3307,10 +3326,8 @@ void bfq_bfqq_expire(struct bfq_data *bfqd,
|
||||
* reason.
|
||||
*/
|
||||
__bfq_bfqq_recalc_budget(bfqd, bfqq, reason);
|
||||
ref = bfqq->ref;
|
||||
__bfq_bfqq_expire(bfqd, bfqq);
|
||||
|
||||
if (ref == 1) /* bfqq is gone, no more actions on it */
|
||||
if (__bfq_bfqq_expire(bfqd, bfqq))
|
||||
/* bfqq is gone, no more actions on it */
|
||||
return;
|
||||
|
||||
bfqq->injected_service = 0;
|
||||
@@ -3521,9 +3538,11 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
||||
* symmetric scenario where:
|
||||
* (i) each of these processes must get the same throughput as
|
||||
* the others;
|
||||
* (ii) all these processes have the same I/O pattern
|
||||
(either sequential or random).
|
||||
* In fact, in such a scenario, the drive will tend to treat
|
||||
* (ii) the I/O of each process has the same properties, in
|
||||
* terms of locality (sequential or random), direction
|
||||
* (reads or writes), request sizes, greediness
|
||||
* (from I/O-bound to sporadic), and so on.
|
||||
* In fact, in such a scenario, the drive tends to treat
|
||||
* the requests of each of these processes in about the same
|
||||
* way as the requests of the others, and thus to provide
|
||||
* each of these processes with about the same throughput
|
||||
@@ -3532,18 +3551,67 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
||||
* certainly needed to guarantee that bfqq receives its
|
||||
* assigned fraction of the device throughput (see [1] for
|
||||
* details).
|
||||
* The problem is that idling may significantly reduce
|
||||
* throughput with certain combinations of types of I/O and
|
||||
* devices. An important example is sync random I/O, on flash
|
||||
* storage with command queueing. So, unless bfqq falls in the
|
||||
* above cases where idling also boosts throughput, it would
|
||||
* be important to check conditions (i) and (ii) accurately,
|
||||
* so as to avoid idling when not strictly needed for service
|
||||
* guarantees.
|
||||
*
|
||||
* We address this issue by controlling, actually, only the
|
||||
* symmetry sub-condition (i), i.e., provided that
|
||||
* sub-condition (i) holds, idling is not performed,
|
||||
* regardless of whether sub-condition (ii) holds. In other
|
||||
* words, only if sub-condition (i) holds, then idling is
|
||||
* allowed, and the device tends to be prevented from queueing
|
||||
* many requests, possibly of several processes. The reason
|
||||
* for not controlling also sub-condition (ii) is that we
|
||||
* exploit preemption to preserve guarantees in case of
|
||||
* symmetric scenarios, even if (ii) does not hold, as
|
||||
* explained in the next two paragraphs.
|
||||
* Unfortunately, it is extremely difficult to thoroughly
|
||||
* check condition (ii). And, in case there are active groups,
|
||||
* it becomes very difficult to check condition (i) too. In
|
||||
* fact, if there are active groups, then, for condition (i)
|
||||
* to become false, it is enough that an active group contains
|
||||
* more active processes or sub-groups than some other active
|
||||
* group. More precisely, for condition (i) to hold because of
|
||||
* such a group, it is not even necessary that the group is
|
||||
* (still) active: it is sufficient that, even if the group
|
||||
* has become inactive, some of its descendant processes still
|
||||
* have some request already dispatched but still waiting for
|
||||
* completion. In fact, requests have still to be guaranteed
|
||||
* their share of the throughput even after being
|
||||
* dispatched. In this respect, it is easy to show that, if a
|
||||
* group frequently becomes inactive while still having
|
||||
* in-flight requests, and if, when this happens, the group is
|
||||
* not considered in the calculation of whether the scenario
|
||||
* is asymmetric, then the group may fail to be guaranteed its
|
||||
* fair share of the throughput (basically because idling may
|
||||
* not be performed for the descendant processes of the group,
|
||||
* but it had to be). We address this issue with the
|
||||
* following bi-modal behavior, implemented in the function
|
||||
* bfq_symmetric_scenario().
|
||||
*
|
||||
* If there are groups with requests waiting for completion
|
||||
* (as commented above, some of these groups may even be
|
||||
* already inactive), then the scenario is tagged as
|
||||
* asymmetric, conservatively, without checking any of the
|
||||
* conditions (i) and (ii). So the device is idled for bfqq.
|
||||
* This behavior matches also the fact that groups are created
|
||||
* exactly if controlling I/O is a primary concern (to
|
||||
* preserve bandwidth and latency guarantees).
|
||||
*
|
||||
* On the opposite end, if there are no groups with requests
|
||||
* waiting for completion, then only condition (i) is actually
|
||||
* controlled, i.e., provided that condition (i) holds, idling
|
||||
* is not performed, regardless of whether condition (ii)
|
||||
* holds. In other words, only if condition (i) does not hold,
|
||||
* then idling is allowed, and the device tends to be
|
||||
* prevented from queueing many requests, possibly of several
|
||||
* processes. Since there are no groups with requests waiting
|
||||
* for completion, then, to control condition (i) it is enough
|
||||
* to check just whether all the queues with requests waiting
|
||||
* for completion also have the same weight.
|
||||
*
|
||||
* Not checking condition (ii) evidently exposes bfqq to the
|
||||
* risk of getting less throughput than its fair share.
|
||||
* However, for queues with the same weight, a further
|
||||
* mechanism, preemption, mitigates or even eliminates this
|
||||
* problem. And it does so without consequences on overall
|
||||
* throughput. This mechanism and its benefits are explained
|
||||
* in the next three paragraphs.
|
||||
*
|
||||
* Even if a queue, say Q, is expired when it remains idle, Q
|
||||
* can still preempt the new in-service queue if the next
|
||||
@@ -3557,11 +3625,7 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
||||
* idling allows the internal queues of the device to contain
|
||||
* many requests, and thus to reorder requests, we can rather
|
||||
* safely assume that the internal scheduler still preserves a
|
||||
* minimum of mid-term fairness. The motivation for using
|
||||
* preemption instead of idling is that, by not idling,
|
||||
* service guarantees are preserved without minimally
|
||||
* sacrificing throughput. In other words, both a high
|
||||
* throughput and its desired distribution are obtained.
|
||||
* minimum of mid-term fairness.
|
||||
*
|
||||
* More precisely, this preemption-based, idleless approach
|
||||
* provides fairness in terms of IOPS, and not sectors per
|
||||
@@ -3580,27 +3644,28 @@ static bool bfq_better_to_idle(struct bfq_queue *bfqq)
|
||||
* 1024/8 times as high as the service received by the other
|
||||
* queue.
|
||||
*
|
||||
* On the other hand, device idling is performed, and thus
|
||||
* pure sector-domain guarantees are provided, for the
|
||||
* following queues, which are likely to need stronger
|
||||
* throughput guarantees: weight-raised queues, and queues
|
||||
* with a higher weight than other queues. When such queues
|
||||
* are active, sub-condition (i) is false, which triggers
|
||||
* device idling.
|
||||
* The motivation for using preemption instead of idling (for
|
||||
* queues with the same weight) is that, by not idling,
|
||||
* service guarantees are preserved (completely or at least in
|
||||
* part) without minimally sacrificing throughput. And, if
|
||||
* there is no active group, then the primary expectation for
|
||||
* this device is probably a high throughput.
|
||||
*
|
||||
* According to the above considerations, the next variable is
|
||||
* true (only) if sub-condition (i) holds. To compute the
|
||||
* value of this variable, we not only use the return value of
|
||||
* the function bfq_symmetric_scenario(), but also check
|
||||
* whether bfqq is being weight-raised, because
|
||||
* bfq_symmetric_scenario() does not take into account also
|
||||
* weight-raised queues (see comments on
|
||||
* bfq_weights_tree_add()). In particular, if bfqq is being
|
||||
* weight-raised, it is important to idle only if there are
|
||||
* other, non-weight-raised queues that may steal throughput
|
||||
* to bfqq. Actually, we should be even more precise, and
|
||||
* differentiate between interactive weight raising and
|
||||
* soft real-time weight raising.
|
||||
* We are now left only with explaining the additional
|
||||
* compound condition that is checked below for deciding
|
||||
* whether the scenario is asymmetric. To explain this
|
||||
* compound condition, we need to add that the function
|
||||
* bfq_symmetric_scenario checks the weights of only
|
||||
* non-weight-raised queues, for efficiency reasons (see
|
||||
* comments on bfq_weights_tree_add()). Then the fact that
|
||||
* bfqq is weight-raised is checked explicitly here. More
|
||||
* precisely, the compound condition below takes into account
|
||||
* also the fact that, even if bfqq is being weight-raised,
|
||||
* the scenario is still symmetric if all queues with requests
|
||||
* waiting for completion happen to be
|
||||
* weight-raised. Actually, we should be even more precise
|
||||
* here, and differentiate between interactive weight raising
|
||||
* and soft real-time weight raising.
|
||||
*
|
||||
* As a side note, it is worth considering that the above
|
||||
* device-idling countermeasures may however fail in the
|
||||
@@ -5422,7 +5487,7 @@ static int bfq_init_queue(struct request_queue *q, struct elevator_type *e)
|
||||
bfqd->idle_slice_timer.function = bfq_idle_slice_timer;
|
||||
|
||||
bfqd->queue_weights_tree = RB_ROOT;
|
||||
bfqd->group_weights_tree = RB_ROOT;
|
||||
bfqd->num_groups_with_pending_reqs = 0;
|
||||
|
||||
INIT_LIST_HEAD(&bfqd->active_list);
|
||||
INIT_LIST_HEAD(&bfqd->idle_list);
|
||||
|
||||
@@ -108,15 +108,14 @@ struct bfq_sched_data {
|
||||
};
|
||||
|
||||
/**
|
||||
* struct bfq_weight_counter - counter of the number of all active entities
|
||||
* struct bfq_weight_counter - counter of the number of all active queues
|
||||
* with a given weight.
|
||||
*/
|
||||
struct bfq_weight_counter {
|
||||
unsigned int weight; /* weight of the entities this counter refers to */
|
||||
unsigned int num_active; /* nr of active entities with this weight */
|
||||
unsigned int weight; /* weight of the queues this counter refers to */
|
||||
unsigned int num_active; /* nr of active queues with this weight */
|
||||
/*
|
||||
* Weights tree member (see bfq_data's @queue_weights_tree and
|
||||
* @group_weights_tree)
|
||||
* Weights tree member (see bfq_data's @queue_weights_tree)
|
||||
*/
|
||||
struct rb_node weights_node;
|
||||
};
|
||||
@@ -151,8 +150,6 @@ struct bfq_weight_counter {
|
||||
struct bfq_entity {
|
||||
/* service_tree member */
|
||||
struct rb_node rb_node;
|
||||
/* pointer to the weight counter associated with this entity */
|
||||
struct bfq_weight_counter *weight_counter;
|
||||
|
||||
/*
|
||||
* Flag, true if the entity is on a tree (either the active or
|
||||
@@ -199,6 +196,9 @@ struct bfq_entity {
|
||||
|
||||
/* flag, set to request a weight, ioprio or ioprio_class change */
|
||||
int prio_changed;
|
||||
|
||||
/* flag, set if the entity is counted in groups_with_pending_reqs */
|
||||
bool in_groups_with_pending_reqs;
|
||||
};
|
||||
|
||||
struct bfq_group;
|
||||
@@ -266,6 +266,9 @@ struct bfq_queue {
|
||||
/* entity representing this queue in the scheduler */
|
||||
struct bfq_entity entity;
|
||||
|
||||
/* pointer to the weight counter associated with this entity */
|
||||
struct bfq_weight_counter *weight_counter;
|
||||
|
||||
/* maximum budget allowed from the feedback mechanism */
|
||||
int max_budget;
|
||||
/* budget expiration (in jiffies) */
|
||||
@@ -448,15 +451,54 @@ struct bfq_data {
|
||||
* bfq_weights_tree_[add|remove] for further details).
|
||||
*/
|
||||
struct rb_root queue_weights_tree;
|
||||
|
||||
/*
|
||||
* rbtree of non-queue @bfq_entity weight counters, sorted by
|
||||
* weight. Used to keep track of whether all @bfq_groups have
|
||||
* the same weight. The tree contains one counter for each
|
||||
* distinct weight associated to some active @bfq_group (see
|
||||
* the comments to the functions bfq_weights_tree_[add|remove]
|
||||
* for further details).
|
||||
* Number of groups with at least one descendant process that
|
||||
* has at least one request waiting for completion. Note that
|
||||
* this accounts for also requests already dispatched, but not
|
||||
* yet completed. Therefore this number of groups may differ
|
||||
* (be larger) than the number of active groups, as a group is
|
||||
* considered active only if its corresponding entity has
|
||||
* descendant queues with at least one request queued. This
|
||||
* number is used to decide whether a scenario is symmetric.
|
||||
* For a detailed explanation see comments on the computation
|
||||
* of the variable asymmetric_scenario in the function
|
||||
* bfq_better_to_idle().
|
||||
*
|
||||
* However, it is hard to compute this number exactly, for
|
||||
* groups with multiple descendant processes. Consider a group
|
||||
* that is inactive, i.e., that has no descendant process with
|
||||
* pending I/O inside BFQ queues. Then suppose that
|
||||
* num_groups_with_pending_reqs is still accounting for this
|
||||
* group, because the group has descendant processes with some
|
||||
* I/O request still in flight. num_groups_with_pending_reqs
|
||||
* should be decremented when the in-flight request of the
|
||||
* last descendant process is finally completed (assuming that
|
||||
* nothing else has changed for the group in the meantime, in
|
||||
* terms of composition of the group and active/inactive state of child
|
||||
* groups and processes). To accomplish this, an additional
|
||||
* pending-request counter must be added to entities, and must
|
||||
* be updated correctly. To avoid this additional field and operations,
|
||||
* we resort to the following tradeoff between simplicity and
|
||||
* accuracy: for an inactive group that is still counted in
|
||||
* num_groups_with_pending_reqs, we decrement
|
||||
* num_groups_with_pending_reqs when the first descendant
|
||||
* process of the group remains with no request waiting for
|
||||
* completion.
|
||||
*
|
||||
* Even this simpler decrement strategy requires a little
|
||||
* carefulness: to avoid multiple decrements, we flag a group,
|
||||
* more precisely an entity representing a group, as still
|
||||
* counted in num_groups_with_pending_reqs when it becomes
|
||||
* inactive. Then, when the first descendant queue of the
|
||||
* entity remains with no request waiting for completion,
|
||||
* num_groups_with_pending_reqs is decremented, and this flag
|
||||
* is reset. After this flag is reset for the entity,
|
||||
* num_groups_with_pending_reqs won't be decremented any
|
||||
* longer in case a new descendant queue of the entity remains
|
||||
* with no request waiting for completion.
|
||||
*/
|
||||
struct rb_root group_weights_tree;
|
||||
unsigned int num_groups_with_pending_reqs;
|
||||
|
||||
/*
|
||||
* Number of bfq_queues containing requests (including the
|
||||
@@ -854,10 +896,10 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync);
|
||||
void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync);
|
||||
struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic);
|
||||
void bfq_pos_tree_add_move(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_entity *entity,
|
||||
void bfq_weights_tree_add(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
struct rb_root *root);
|
||||
void __bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_entity *entity,
|
||||
struct bfq_queue *bfqq,
|
||||
struct rb_root *root);
|
||||
void bfq_weights_tree_remove(struct bfq_data *bfqd,
|
||||
struct bfq_queue *bfqq);
|
||||
@@ -951,7 +993,7 @@ bool __bfq_deactivate_entity(struct bfq_entity *entity,
|
||||
bool ins_into_idle_tree);
|
||||
bool next_queue_may_preempt(struct bfq_data *bfqd);
|
||||
struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd);
|
||||
void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
|
||||
bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd);
|
||||
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
bool ins_into_idle_tree, bool expiration);
|
||||
void bfq_activate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq);
|
||||
|
||||
@@ -788,25 +788,23 @@ __bfq_entity_update_weight_prio(struct bfq_service_tree *old_st,
|
||||
new_weight = entity->orig_weight *
|
||||
(bfqq ? bfqq->wr_coeff : 1);
|
||||
/*
|
||||
* If the weight of the entity changes, remove the entity
|
||||
* from its old weight counter (if there is a counter
|
||||
* associated with the entity), and add it to the counter
|
||||
* associated with its new weight.
|
||||
* If the weight of the entity changes, and the entity is a
|
||||
* queue, remove the entity from its old weight counter (if
|
||||
* there is a counter associated with the entity).
|
||||
*/
|
||||
if (prev_weight != new_weight) {
|
||||
root = bfqq ? &bfqd->queue_weights_tree :
|
||||
&bfqd->group_weights_tree;
|
||||
__bfq_weights_tree_remove(bfqd, entity, root);
|
||||
if (prev_weight != new_weight && bfqq) {
|
||||
root = &bfqd->queue_weights_tree;
|
||||
__bfq_weights_tree_remove(bfqd, bfqq, root);
|
||||
}
|
||||
entity->weight = new_weight;
|
||||
/*
|
||||
* Add the entity to its weights tree only if it is
|
||||
* not associated with a weight-raised queue.
|
||||
* Add the entity, if it is not a weight-raised queue,
|
||||
* to the counter associated with its new weight.
|
||||
*/
|
||||
if (prev_weight != new_weight &&
|
||||
(bfqq ? bfqq->wr_coeff == 1 : 1))
|
||||
if (prev_weight != new_weight && bfqq && bfqq->wr_coeff == 1) {
|
||||
/* If we get here, root has been initialized. */
|
||||
bfq_weights_tree_add(bfqd, entity, root);
|
||||
bfq_weights_tree_add(bfqd, bfqq, root);
|
||||
}
|
||||
|
||||
new_st->wsum += entity->weight;
|
||||
|
||||
@@ -1012,9 +1010,12 @@ static void __bfq_activate_entity(struct bfq_entity *entity,
|
||||
if (!bfq_entity_to_bfqq(entity)) { /* bfq_group */
|
||||
struct bfq_group *bfqg =
|
||||
container_of(entity, struct bfq_group, entity);
|
||||
struct bfq_data *bfqd = bfqg->bfqd;
|
||||
|
||||
bfq_weights_tree_add(bfqg->bfqd, entity,
|
||||
&bfqd->group_weights_tree);
|
||||
if (!entity->in_groups_with_pending_reqs) {
|
||||
entity->in_groups_with_pending_reqs = true;
|
||||
bfqd->num_groups_with_pending_reqs++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1599,7 +1600,8 @@ struct bfq_queue *bfq_get_next_queue(struct bfq_data *bfqd)
|
||||
return bfqq;
|
||||
}
|
||||
|
||||
void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
|
||||
/* returns true if the in-service queue gets freed */
|
||||
bool __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
|
||||
{
|
||||
struct bfq_queue *in_serv_bfqq = bfqd->in_service_queue;
|
||||
struct bfq_entity *in_serv_entity = &in_serv_bfqq->entity;
|
||||
@@ -1623,8 +1625,20 @@ void __bfq_bfqd_reset_in_service(struct bfq_data *bfqd)
|
||||
* service tree either, then release the service reference to
|
||||
* the queue it represents (taken with bfq_get_entity).
|
||||
*/
|
||||
if (!in_serv_entity->on_st)
|
||||
if (!in_serv_entity->on_st) {
|
||||
/*
|
||||
* If no process is referencing in_serv_bfqq any
|
||||
* longer, then the service reference may be the only
|
||||
* reference to the queue. If this is the case, then
|
||||
* bfqq gets freed here.
|
||||
*/
|
||||
int ref = in_serv_bfqq->ref;
|
||||
bfq_put_queue(in_serv_bfqq);
|
||||
if (ref == 1)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void bfq_deactivate_bfqq(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
@@ -1667,15 +1681,15 @@ void bfq_del_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq,
|
||||
|
||||
bfqd->busy_queues--;
|
||||
|
||||
if (!bfqq->dispatched)
|
||||
bfq_weights_tree_remove(bfqd, bfqq);
|
||||
|
||||
if (bfqq->wr_coeff > 1)
|
||||
bfqd->wr_busy_queues--;
|
||||
|
||||
bfqg_stats_update_dequeue(bfqq_group(bfqq));
|
||||
|
||||
bfq_deactivate_bfqq(bfqd, bfqq, true, expiration);
|
||||
|
||||
if (!bfqq->dispatched)
|
||||
bfq_weights_tree_remove(bfqd, bfqq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1692,7 +1706,7 @@ void bfq_add_bfqq_busy(struct bfq_data *bfqd, struct bfq_queue *bfqq)
|
||||
|
||||
if (!bfqq->dispatched)
|
||||
if (bfqq->wr_coeff == 1)
|
||||
bfq_weights_tree_add(bfqd, &bfqq->entity,
|
||||
bfq_weights_tree_add(bfqd, bfqq,
|
||||
&bfqd->queue_weights_tree);
|
||||
|
||||
if (bfqq->wr_coeff > 1)
|
||||
|
||||
Reference in New Issue
Block a user