This is the 4.19.64 stable release

-----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCAAdFiEEZH8oZUiU471FcZm+ONu9yGCSaT4FAl1GibIACgkQONu9yGCS
 aT7z2hAAmv8AsH9IG43m7t6zLroJVswr/9594xk7yPBQgcY3/PW2aTFBCFbsdOL4
 yXcj2PSwRiq9K6qAJULrvOvncR9fIILHqzWzyXnoaZ30lR/FxaaFmuHZX/5Ix1tB
 e5EEE/EA49UAEjEDaMLq8g2IvibsReDxmSpnXyBJWoyRAdFIElVnMJ2+zvP/wRhF
 NKzQj/bj/qecCbis2lUCaVWJFZ6+P/52UbD8lvIwqR3nk2TKsGDcLU6eY3yg4KrB
 rEHl5T8KIPrkX3KNIEB8EcFREene+rdpZLLVe4fYwf+gOqfiFXSzZZvweauMkplq
 ehlVHkykvQvlsVM2tjBD379z3C4aasZDuMVNMCbAy2FlruLeBQ7gEn77mCJB9VH5
 /n/mlc2yizdoowtARCLWOUMfASpdSbqu2SQ7A/3kwG7l6GrpzKSIU2nQgm+41sUZ
 QJVtZ3IYsPoYjnU4B3JZzgJnf3M9jcRz/3JegviqhSEbF1gaScJX0cqN8C1idN/v
 ZAGCJK9S20/EEEsp5jn+bq2grUehvmD4TVDfot4P+5yRYyBIhMFpbM2RpjydOpwy
 +x8D1Q34LYPFgZfQ0vF62vcSBhMBiJ/7j41rUeo44K+Lg00F3yCOyL6FxK6S8h6j
 wsD0xLbllMrhV5KRYFizb3QbCHoHYiROIJk76uLvB+Tqq2Jg9VQ=
 =qIi2
 -----END PGP SIGNATURE-----

Merge 4.19.64 into android-4.19-q

Changes in 4.19.64
	hv_sock: Add support for delayed close
	vsock: correct removal of socket from the list
	NFS: Fix dentry revalidation on NFSv4 lookup
	NFS: Refactor nfs_lookup_revalidate()
	NFSv4: Fix lookup revalidate of regular files
	usb: dwc2: Disable all EP's on disconnect
	usb: dwc2: Fix disable all EP's on disconnect
	arm64: compat: Provide definition for COMPAT_SIGMINSTKSZ
	binder: fix possible UAF when freeing buffer
	ISDN: hfcsusb: checking idx of ep configuration
	media: au0828: fix null dereference in error path
	ath10k: Change the warning message string
	media: cpia2_usb: first wake up, then free in disconnect
	media: pvrusb2: use a different format for warnings
	NFS: Cleanup if nfs_match_client is interrupted
	media: radio-raremono: change devm_k*alloc to k*alloc
	iommu/vt-d: Don't queue_iova() if there is no flush queue
	iommu/iova: Fix compilation error with !CONFIG_IOMMU_IOVA
	Bluetooth: hci_uart: check for missing tty operations
	vhost: introduce vhost_exceeds_weight()
	vhost_net: fix possible infinite loop
	vhost: vsock: add weight support
	vhost: scsi: add weight support
	sched/fair: Don't free p->numa_faults with concurrent readers
	sched/fair: Use RCU accessors consistently for ->numa_group
	/proc/<pid>/cmdline: remove all the special cases
	/proc/<pid>/cmdline: add back the setproctitle() special case
	drivers/pps/pps.c: clear offset flags in PPS_SETPARAMS ioctl
	Fix allyesconfig output.
	ceph: hold i_ceph_lock when removing caps for freeing inode
	block, scsi: Change the preempt-only flag into a counter
	scsi: core: Avoid that a kernel warning appears during system resume
	ip_tunnel: allow not to count pkts on tstats by setting skb's dev to NULL
	Linux 4.19.64

Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
Change-Id: Ide69316db2c4e7d5e6a028f7a259c1e3de49478e
This commit is contained in:
Greg Kroah-Hartman 2019-08-04 09:36:43 +02:00
commit 571263b109
46 changed files with 727 additions and 431 deletions

View file

@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
VERSION = 4
PATCHLEVEL = 19
SUBLEVEL = 63
SUBLEVEL = 64
EXTRAVERSION =
NAME = "People's Front"

View file

@ -159,6 +159,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
}
#define compat_user_stack_pointer() (user_stack_pointer(task_pt_regs(current)))
#define COMPAT_MINSIGSTKSZ 2048
static inline void __user *arch_compat_alloc_user_space(long len)
{

View file

@ -8,27 +8,19 @@ config SH_ALPHA_BOARD
bool
config SH_DEVICE_TREE
bool "Board Described by Device Tree"
bool
select OF
select OF_EARLY_FLATTREE
select TIMER_OF
select COMMON_CLK
select GENERIC_CALIBRATE_DELAY
help
Select Board Described by Device Tree to build a kernel that
does not hard-code any board-specific knowledge but instead uses
a device tree blob provided by the boot-loader. You must enable
drivers for any hardware you want to use separately. At this
time, only boards based on the open-hardware J-Core processors
have sufficient driver coverage to use this option; do not
select it if you are using original SuperH hardware.
config SH_JCORE_SOC
bool "J-Core SoC"
depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2)
select SH_DEVICE_TREE
select CLKSRC_JCORE_PIT
select JCORE_AIC
default y if CPU_J2
depends on CPU_J2
help
Select this option to include drivers core components of the
J-Core SoC, including interrupt controllers and timers.

View file

@ -421,24 +421,25 @@ void blk_sync_queue(struct request_queue *q)
EXPORT_SYMBOL(blk_sync_queue);
/**
* blk_set_preempt_only - set QUEUE_FLAG_PREEMPT_ONLY
* blk_set_pm_only - increment pm_only counter
* @q: request queue pointer
*
* Returns the previous value of the PREEMPT_ONLY flag - 0 if the flag was not
* set and 1 if the flag was already set.
*/
int blk_set_preempt_only(struct request_queue *q)
void blk_set_pm_only(struct request_queue *q)
{
return blk_queue_flag_test_and_set(QUEUE_FLAG_PREEMPT_ONLY, q);
atomic_inc(&q->pm_only);
}
EXPORT_SYMBOL_GPL(blk_set_preempt_only);
EXPORT_SYMBOL_GPL(blk_set_pm_only);
void blk_clear_preempt_only(struct request_queue *q)
void blk_clear_pm_only(struct request_queue *q)
{
blk_queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
wake_up_all(&q->mq_freeze_wq);
int pm_only;
pm_only = atomic_dec_return(&q->pm_only);
WARN_ON_ONCE(pm_only < 0);
if (pm_only == 0)
wake_up_all(&q->mq_freeze_wq);
}
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
EXPORT_SYMBOL_GPL(blk_clear_pm_only);
/**
* __blk_run_queue_uncond - run a queue whether or not it has been stopped
@ -916,7 +917,7 @@ EXPORT_SYMBOL(blk_alloc_queue);
*/
int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
{
const bool preempt = flags & BLK_MQ_REQ_PREEMPT;
const bool pm = flags & BLK_MQ_REQ_PREEMPT;
while (true) {
bool success = false;
@ -924,11 +925,11 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
rcu_read_lock();
if (percpu_ref_tryget_live(&q->q_usage_counter)) {
/*
* The code that sets the PREEMPT_ONLY flag is
* responsible for ensuring that that flag is globally
* visible before the queue is unfrozen.
* The code that increments the pm_only counter is
* responsible for ensuring that that counter is
* globally visible before the queue is unfrozen.
*/
if (preempt || !blk_queue_preempt_only(q)) {
if (pm || !blk_queue_pm_only(q)) {
success = true;
} else {
percpu_ref_put(&q->q_usage_counter);
@ -953,7 +954,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
wait_event(q->mq_freeze_wq,
(atomic_read(&q->mq_freeze_depth) == 0 &&
(preempt || !blk_queue_preempt_only(q))) ||
(pm || !blk_queue_pm_only(q))) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;

View file

@ -102,6 +102,14 @@ static int blk_flags_show(struct seq_file *m, const unsigned long flags,
return 0;
}
static int queue_pm_only_show(void *data, struct seq_file *m)
{
struct request_queue *q = data;
seq_printf(m, "%d\n", atomic_read(&q->pm_only));
return 0;
}
#define QUEUE_FLAG_NAME(name) [QUEUE_FLAG_##name] = #name
static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(QUEUED),
@ -132,7 +140,6 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(REGISTERED),
QUEUE_FLAG_NAME(SCSI_PASSTHROUGH),
QUEUE_FLAG_NAME(QUIESCED),
QUEUE_FLAG_NAME(PREEMPT_ONLY),
};
#undef QUEUE_FLAG_NAME
@ -209,6 +216,7 @@ static ssize_t queue_write_hint_store(void *data, const char __user *buf,
static const struct blk_mq_debugfs_attr blk_mq_debugfs_queue_attrs[] = {
{ "poll_stat", 0400, queue_poll_stat_show },
{ "requeue_list", 0400, .seq_ops = &queue_requeue_list_seq_ops },
{ "pm_only", 0600, queue_pm_only_show, NULL },
{ "state", 0600, queue_state_show, queue_state_write },
{ "write_hints", 0600, queue_write_hint_show, queue_write_hint_store },
{ "zone_wlock", 0400, queue_zone_wlock_show, NULL },

View file

@ -2138,8 +2138,18 @@ static struct binder_thread *binder_get_txn_from_and_acq_inner(
static void binder_free_transaction(struct binder_transaction *t)
{
if (t->buffer)
t->buffer->transaction = NULL;
struct binder_proc *target_proc = t->to_proc;
if (target_proc) {
binder_inner_proc_lock(target_proc);
if (t->buffer)
t->buffer->transaction = NULL;
binder_inner_proc_unlock(target_proc);
}
/*
* If the transaction has no target_proc, then
* t->buffer->transaction has already been cleared.
*/
kfree(t);
binder_stats_deleted(BINDER_STAT_TRANSACTION);
}
@ -3848,10 +3858,12 @@ static int binder_thread_write(struct binder_proc *proc,
buffer->debug_id,
buffer->transaction ? "active" : "finished");
binder_inner_proc_lock(proc);
if (buffer->transaction) {
buffer->transaction->buffer = NULL;
buffer->transaction = NULL;
}
binder_inner_proc_unlock(proc);
if (buffer->async_transaction && buffer->target_node) {
struct binder_node *buf_node;
struct binder_work *w;

View file

@ -112,6 +112,9 @@ static int ath_open(struct hci_uart *hu)
BT_DBG("hu %p", hu);
if (!hci_uart_has_flow_control(hu))
return -EOPNOTSUPP;
ath = kzalloc(sizeof(*ath), GFP_KERNEL);
if (!ath)
return -ENOMEM;

View file

@ -369,6 +369,9 @@ static int bcm_open(struct hci_uart *hu)
bt_dev_dbg(hu->hdev, "hu %p", hu);
if (!hci_uart_has_flow_control(hu))
return -EOPNOTSUPP;
bcm = kzalloc(sizeof(*bcm), GFP_KERNEL);
if (!bcm)
return -ENOMEM;

View file

@ -406,6 +406,9 @@ static int intel_open(struct hci_uart *hu)
BT_DBG("hu %p", hu);
if (!hci_uart_has_flow_control(hu))
return -EOPNOTSUPP;
intel = kzalloc(sizeof(*intel), GFP_KERNEL);
if (!intel)
return -ENOMEM;

View file

@ -299,6 +299,19 @@ static int hci_uart_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
return 0;
}
/* Check the underlying device or tty has flow control support */
bool hci_uart_has_flow_control(struct hci_uart *hu)
{
/* serdev nodes check if the needed operations are present */
if (hu->serdev)
return true;
if (hu->tty->driver->ops->tiocmget && hu->tty->driver->ops->tiocmset)
return true;
return false;
}
/* Flow control or un-flow control the device */
void hci_uart_set_flow_control(struct hci_uart *hu, bool enable)
{

View file

@ -66,6 +66,9 @@ static int mrvl_open(struct hci_uart *hu)
BT_DBG("hu %p", hu);
if (!hci_uart_has_flow_control(hu))
return -EOPNOTSUPP;
mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL);
if (!mrvl)
return -ENOMEM;

View file

@ -450,6 +450,9 @@ static int qca_open(struct hci_uart *hu)
BT_DBG("hu %p qca_open", hu);
if (!hci_uart_has_flow_control(hu))
return -EOPNOTSUPP;
qca = kzalloc(sizeof(struct qca_data), GFP_KERNEL);
if (!qca)
return -ENOMEM;

View file

@ -118,6 +118,7 @@ int hci_uart_tx_wakeup(struct hci_uart *hu);
int hci_uart_init_ready(struct hci_uart *hu);
void hci_uart_init_work(struct work_struct *work);
void hci_uart_set_baudrate(struct hci_uart *hu, unsigned int speed);
bool hci_uart_has_flow_control(struct hci_uart *hu);
void hci_uart_set_flow_control(struct hci_uart *hu, bool enable);
void hci_uart_set_speeds(struct hci_uart *hu, unsigned int init_speed,
unsigned int oper_speed);

View file

@ -3721,7 +3721,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size)
freelist = domain_unmap(domain, start_pfn, last_pfn);
if (intel_iommu_strict) {
if (intel_iommu_strict || !has_iova_flush_queue(&domain->iovad)) {
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
nrpages, !freelist, 0);
/* free iova */

View file

@ -65,9 +65,14 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule,
}
EXPORT_SYMBOL_GPL(init_iova_domain);
bool has_iova_flush_queue(struct iova_domain *iovad)
{
return !!iovad->fq;
}
static void free_iova_flush_queue(struct iova_domain *iovad)
{
if (!iovad->fq)
if (!has_iova_flush_queue(iovad))
return;
if (timer_pending(&iovad->fq_timer))
@ -85,13 +90,14 @@ static void free_iova_flush_queue(struct iova_domain *iovad)
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
{
struct iova_fq __percpu *queue;
int cpu;
atomic64_set(&iovad->fq_flush_start_cnt, 0);
atomic64_set(&iovad->fq_flush_finish_cnt, 0);
iovad->fq = alloc_percpu(struct iova_fq);
if (!iovad->fq)
queue = alloc_percpu(struct iova_fq);
if (!queue)
return -ENOMEM;
iovad->flush_cb = flush_cb;
@ -100,13 +106,17 @@ int init_iova_flush_queue(struct iova_domain *iovad,
for_each_possible_cpu(cpu) {
struct iova_fq *fq;
fq = per_cpu_ptr(iovad->fq, cpu);
fq = per_cpu_ptr(queue, cpu);
fq->head = 0;
fq->tail = 0;
spin_lock_init(&fq->lock);
}
smp_wmb();
iovad->fq = queue;
timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
atomic_set(&iovad->fq_timer_on, 0);

View file

@ -1967,6 +1967,9 @@ hfcsusb_probe(struct usb_interface *intf, const struct usb_device_id *id)
/* get endpoint base */
idx = ((ep_addr & 0x7f) - 1) * 2;
if (idx > 15)
return -EIO;
if (ep_addr & 0x80)
idx++;
attr = ep->desc.bmAttributes;

View file

@ -271,6 +271,14 @@ static int vidioc_g_frequency(struct file *file, void *priv,
return 0;
}
static void raremono_device_release(struct v4l2_device *v4l2_dev)
{
struct raremono_device *radio = to_raremono_dev(v4l2_dev);
kfree(radio->buffer);
kfree(radio);
}
/* File system interface */
static const struct v4l2_file_operations usb_raremono_fops = {
.owner = THIS_MODULE,
@ -295,12 +303,14 @@ static int usb_raremono_probe(struct usb_interface *intf,
struct raremono_device *radio;
int retval = 0;
radio = devm_kzalloc(&intf->dev, sizeof(struct raremono_device), GFP_KERNEL);
if (radio)
radio->buffer = devm_kmalloc(&intf->dev, BUFFER_LENGTH, GFP_KERNEL);
if (!radio || !radio->buffer)
radio = kzalloc(sizeof(*radio), GFP_KERNEL);
if (!radio)
return -ENOMEM;
radio->buffer = kmalloc(BUFFER_LENGTH, GFP_KERNEL);
if (!radio->buffer) {
kfree(radio);
return -ENOMEM;
}
radio->usbdev = interface_to_usbdev(intf);
radio->intf = intf;
@ -324,7 +334,8 @@ static int usb_raremono_probe(struct usb_interface *intf,
if (retval != 3 ||
(get_unaligned_be16(&radio->buffer[1]) & 0xfff) == 0x0242) {
dev_info(&intf->dev, "this is not Thanko's Raremono.\n");
return -ENODEV;
retval = -ENODEV;
goto free_mem;
}
dev_info(&intf->dev, "Thanko's Raremono connected: (%04X:%04X)\n",
@ -333,7 +344,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
retval = v4l2_device_register(&intf->dev, &radio->v4l2_dev);
if (retval < 0) {
dev_err(&intf->dev, "couldn't register v4l2_device\n");
return retval;
goto free_mem;
}
mutex_init(&radio->lock);
@ -345,6 +356,7 @@ static int usb_raremono_probe(struct usb_interface *intf,
radio->vdev.ioctl_ops = &usb_raremono_ioctl_ops;
radio->vdev.lock = &radio->lock;
radio->vdev.release = video_device_release_empty;
radio->v4l2_dev.release = raremono_device_release;
usb_set_intfdata(intf, &radio->v4l2_dev);
@ -360,6 +372,10 @@ static int usb_raremono_probe(struct usb_interface *intf,
}
dev_err(&intf->dev, "could not register video device\n");
v4l2_device_unregister(&radio->v4l2_dev);
free_mem:
kfree(radio->buffer);
kfree(radio);
return retval;
}

View file

@ -623,6 +623,12 @@ static int au0828_usb_probe(struct usb_interface *interface,
/* Setup */
au0828_card_setup(dev);
/*
* Store the pointer to the au0828_dev so it can be accessed in
* au0828_usb_disconnect
*/
usb_set_intfdata(interface, dev);
/* Analog TV */
retval = au0828_analog_register(dev, interface);
if (retval) {
@ -641,12 +647,6 @@ static int au0828_usb_probe(struct usb_interface *interface,
/* Remote controller */
au0828_rc_register(dev);
/*
* Store the pointer to the au0828_dev so it can be accessed in
* au0828_usb_disconnect
*/
usb_set_intfdata(interface, dev);
pr_info("Registered device AU0828 [%s]\n",
dev->board.name == NULL ? "Unset" : dev->board.name);

View file

@ -902,7 +902,6 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
cpia2_unregister_camera(cam);
v4l2_device_disconnect(&cam->v4l2_dev);
mutex_unlock(&cam->v4l2_lock);
v4l2_device_put(&cam->v4l2_dev);
if(cam->buffers) {
DBG("Wakeup waiting processes\n");
@ -911,6 +910,8 @@ static void cpia2_usb_disconnect(struct usb_interface *intf)
wake_up_interruptible(&cam->wq_stream);
}
v4l2_device_put(&cam->v4l2_dev);
LOG("CPiA2 camera disconnected.\n");
}

View file

@ -1680,7 +1680,7 @@ static int pvr2_decoder_enable(struct pvr2_hdw *hdw,int enablefl)
}
if (!hdw->flag_decoder_missed) {
pvr2_trace(PVR2_TRACE_ERROR_LEGS,
"WARNING: No decoder present");
"***WARNING*** No decoder present");
hdw->flag_decoder_missed = !0;
trace_stbit("flag_decoder_missed",
hdw->flag_decoder_missed);
@ -2366,7 +2366,7 @@ struct pvr2_hdw *pvr2_hdw_create(struct usb_interface *intf,
if (hdw_desc->flag_is_experimental) {
pvr2_trace(PVR2_TRACE_INFO, "**********");
pvr2_trace(PVR2_TRACE_INFO,
"WARNING: Support for this device (%s) is experimental.",
"***WARNING*** Support for this device (%s) is experimental.",
hdw_desc->description);
pvr2_trace(PVR2_TRACE_INFO,
"Important functionality might not be entirely working.");

View file

@ -343,11 +343,11 @@ static int i2c_hack_cx25840(struct pvr2_hdw *hdw,
if ((ret != 0) || (*rdata == 0x04) || (*rdata == 0x0a)) {
pvr2_trace(PVR2_TRACE_ERROR_LEGS,
"WARNING: Detected a wedged cx25840 chip; the device will not work.");
"***WARNING*** Detected a wedged cx25840 chip; the device will not work.");
pvr2_trace(PVR2_TRACE_ERROR_LEGS,
"WARNING: Try power cycling the pvrusb2 device.");
"***WARNING*** Try power cycling the pvrusb2 device.");
pvr2_trace(PVR2_TRACE_ERROR_LEGS,
"WARNING: Disabling further access to the device to prevent other foul-ups.");
"***WARNING*** Disabling further access to the device to prevent other foul-ups.");
// This blocks all further communication with the part.
hdw->i2c_func[0x44] = NULL;
pvr2_hdw_render_useless(hdw);

View file

@ -353,7 +353,7 @@ struct v4l2_standard *pvr2_std_create_enum(unsigned int *countptr,
bcnt = pvr2_std_id_to_str(buf,sizeof(buf),fmsk);
pvr2_trace(
PVR2_TRACE_ERROR_LEGS,
"WARNING: Failed to classify the following standard(s): %.*s",
"***WARNING*** Failed to classify the following standard(s): %.*s",
bcnt,buf);
}

View file

@ -1025,7 +1025,7 @@ static int ath10k_usb_probe(struct usb_interface *interface,
}
/* TODO: remove this once USB support is fully implemented */
ath10k_warn(ar, "WARNING: ath10k USB support is incomplete, don't expect anything to work!\n");
ath10k_warn(ar, "Warning: ath10k USB support is incomplete, don't expect anything to work!\n");
return 0;

View file

@ -166,6 +166,14 @@ static long pps_cdev_ioctl(struct file *file,
pps->params.mode |= PPS_CANWAIT;
pps->params.api_version = PPS_API_VERS;
/*
* Clear unused fields of pps_kparams to avoid leaking
* uninitialized data of the PPS_SETPARAMS caller via
* PPS_GETPARAMS
*/
pps->params.assert_off_tu.flags = 0;
pps->params.clear_off_tu.flags = 0;
spin_unlock_irq(&pps->lock);
break;

View file

@ -3059,11 +3059,14 @@ scsi_device_quiesce(struct scsi_device *sdev)
*/
WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current);
blk_set_preempt_only(q);
if (sdev->quiesced_by == current)
return 0;
blk_set_pm_only(q);
blk_mq_freeze_queue(q);
/*
* Ensure that the effect of blk_set_preempt_only() will be visible
* Ensure that the effect of blk_set_pm_only() will be visible
* for percpu_ref_tryget() callers that occur after the queue
* unfreeze even if the queue was already frozen before this function
* was called. See also https://lwn.net/Articles/573497/.
@ -3076,7 +3079,7 @@ scsi_device_quiesce(struct scsi_device *sdev)
if (err == 0)
sdev->quiesced_by = current;
else
blk_clear_preempt_only(q);
blk_clear_pm_only(q);
mutex_unlock(&sdev->state_mutex);
return err;
@ -3099,8 +3102,10 @@ void scsi_device_resume(struct scsi_device *sdev)
* device deleted during suspend)
*/
mutex_lock(&sdev->state_mutex);
sdev->quiesced_by = NULL;
blk_clear_preempt_only(sdev->request_queue);
if (sdev->quiesced_by) {
sdev->quiesced_by = NULL;
blk_clear_pm_only(sdev->request_queue);
}
if (sdev->sdev_state == SDEV_QUIESCE)
scsi_device_set_state(sdev, SDEV_RUNNING);
mutex_unlock(&sdev->state_mutex);

View file

@ -3125,6 +3125,7 @@ void dwc2_hsotg_disconnect(struct dwc2_hsotg *hsotg)
hsotg->connected = 0;
hsotg->test_mode = 0;
/* all endpoints should be shutdown */
for (ep = 0; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
kill_all_requests(hsotg, hsotg->eps_in[ep],
@ -3175,6 +3176,7 @@ static void dwc2_hsotg_irq_fifoempty(struct dwc2_hsotg *hsotg, bool periodic)
GINTSTS_PTXFEMP | \
GINTSTS_RXFLVL)
static int dwc2_hsotg_ep_disable(struct usb_ep *ep);
/**
* dwc2_hsotg_core_init - issue softreset to the core
* @hsotg: The device state
@ -3189,13 +3191,23 @@ void dwc2_hsotg_core_init_disconnected(struct dwc2_hsotg *hsotg,
u32 val;
u32 usbcfg;
u32 dcfg = 0;
int ep;
/* Kill any ep0 requests as controller will be reinitialized */
kill_all_requests(hsotg, hsotg->eps_out[0], -ECONNRESET);
if (!is_usb_reset)
if (!is_usb_reset) {
if (dwc2_core_reset(hsotg, true))
return;
} else {
/* all endpoints should be shutdown */
for (ep = 1; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
if (hsotg->eps_out[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
}
}
/*
* we must now enable ep0 ready for host detection and then
@ -3993,7 +4005,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
struct dwc2_hsotg *hsotg = hs_ep->parent;
int dir_in = hs_ep->dir_in;
int index = hs_ep->index;
unsigned long flags;
u32 epctrl_reg;
u32 ctrl;
@ -4011,8 +4022,6 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
epctrl_reg = dir_in ? DIEPCTL(index) : DOEPCTL(index);
spin_lock_irqsave(&hsotg->lock, flags);
ctrl = dwc2_readl(hsotg, epctrl_reg);
if (ctrl & DXEPCTL_EPENA)
@ -4035,10 +4044,22 @@ static int dwc2_hsotg_ep_disable(struct usb_ep *ep)
hs_ep->fifo_index = 0;
hs_ep->fifo_size = 0;
spin_unlock_irqrestore(&hsotg->lock, flags);
return 0;
}
static int dwc2_hsotg_ep_disable_lock(struct usb_ep *ep)
{
struct dwc2_hsotg_ep *hs_ep = our_ep(ep);
struct dwc2_hsotg *hsotg = hs_ep->parent;
unsigned long flags;
int ret;
spin_lock_irqsave(&hsotg->lock, flags);
ret = dwc2_hsotg_ep_disable(ep);
spin_unlock_irqrestore(&hsotg->lock, flags);
return ret;
}
/**
* on_list - check request is on the given endpoint
* @ep: The endpoint to check.
@ -4186,7 +4207,7 @@ static int dwc2_hsotg_ep_sethalt_lock(struct usb_ep *ep, int value)
static const struct usb_ep_ops dwc2_hsotg_ep_ops = {
.enable = dwc2_hsotg_ep_enable,
.disable = dwc2_hsotg_ep_disable,
.disable = dwc2_hsotg_ep_disable_lock,
.alloc_request = dwc2_hsotg_ep_alloc_request,
.free_request = dwc2_hsotg_ep_free_request,
.queue = dwc2_hsotg_ep_queue_lock,
@ -4326,9 +4347,9 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget)
/* all endpoints should be shutdown */
for (ep = 1; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
if (hsotg->eps_out[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
}
spin_lock_irqsave(&hsotg->lock, flags);
@ -4776,9 +4797,9 @@ int dwc2_hsotg_suspend(struct dwc2_hsotg *hsotg)
for (ep = 0; ep < hsotg->num_of_eps; ep++) {
if (hsotg->eps_in[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_in[ep]->ep);
dwc2_hsotg_ep_disable_lock(&hsotg->eps_in[ep]->ep);
if (hsotg->eps_out[ep])
dwc2_hsotg_ep_disable(&hsotg->eps_out[ep]->ep);
dwc2_hsotg_ep_disable_lock(&hsotg->eps_out[ep]->ep);
}
}

View file

@ -497,12 +497,6 @@ static size_t init_iov_iter(struct vhost_virtqueue *vq, struct iov_iter *iter,
return iov_iter_count(iter);
}
static bool vhost_exceeds_weight(int pkts, int total_len)
{
return total_len >= VHOST_NET_WEIGHT ||
pkts >= VHOST_NET_PKT_WEIGHT;
}
static int get_tx_bufs(struct vhost_net *net,
struct vhost_net_virtqueue *nvq,
struct msghdr *msg,
@ -557,7 +551,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
int err;
int sent_pkts = 0;
for (;;) {
do {
bool busyloop_intr = false;
head = get_tx_bufs(net, nvq, &msg, &out, &in, &len,
@ -598,11 +592,7 @@ static void handle_tx_copy(struct vhost_net *net, struct socket *sock)
err, len);
if (++nvq->done_idx >= VHOST_NET_BATCH)
vhost_net_signal_used(nvq);
if (vhost_exceeds_weight(++sent_pkts, total_len)) {
vhost_poll_queue(&vq->poll);
break;
}
}
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
vhost_net_signal_used(nvq);
}
@ -626,7 +616,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
bool zcopy_used;
int sent_pkts = 0;
for (;;) {
do {
bool busyloop_intr;
/* Release DMAs done buffers first */
@ -701,11 +691,7 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
else
vhost_zerocopy_signal_used(net, vq);
vhost_net_tx_packet(net);
if (unlikely(vhost_exceeds_weight(++sent_pkts, total_len))) {
vhost_poll_queue(&vq->poll);
break;
}
}
} while (likely(!vhost_exceeds_weight(vq, ++sent_pkts, total_len)));
}
/* Expects to be always run from workqueue - which acts as
@ -941,8 +927,11 @@ static void handle_rx(struct vhost_net *net)
vq->log : NULL;
mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
while ((sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
&busyloop_intr))) {
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
&busyloop_intr);
if (!sock_len)
break;
sock_len += sock_hlen;
vhost_len = sock_len + vhost_hlen;
headcount = get_rx_bufs(vq, vq->heads + nvq->done_idx,
@ -1027,14 +1016,11 @@ static void handle_rx(struct vhost_net *net)
vhost_log_write(vq, vq_log, log, vhost_len,
vq->iov, in);
total_len += vhost_len;
if (unlikely(vhost_exceeds_weight(++recv_pkts, total_len))) {
vhost_poll_queue(&vq->poll);
goto out;
}
}
} while (likely(!vhost_exceeds_weight(vq, ++recv_pkts, total_len)));
if (unlikely(busyloop_intr))
vhost_poll_queue(&vq->poll);
else
else if (!sock_len)
vhost_net_enable_vq(net, vq);
out:
vhost_net_signal_used(nvq);
@ -1115,7 +1101,8 @@ static int vhost_net_open(struct inode *inode, struct file *f)
vhost_net_buf_init(&n->vqs[i].rxq);
}
vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX,
UIO_MAXIOV + VHOST_NET_BATCH);
UIO_MAXIOV + VHOST_NET_BATCH,
VHOST_NET_PKT_WEIGHT, VHOST_NET_WEIGHT);
vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev);
vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);

View file

@ -57,6 +57,12 @@
#define VHOST_SCSI_PREALLOC_UPAGES 2048
#define VHOST_SCSI_PREALLOC_PROT_SGLS 2048
/* Max number of requests before requeueing the job.
* Using this limit prevents one virtqueue from starving others with
* request.
*/
#define VHOST_SCSI_WEIGHT 256
struct vhost_scsi_inflight {
/* Wait for the flush operation to finish */
struct completion comp;
@ -811,7 +817,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
u64 tag;
u32 exp_data_len, data_direction;
unsigned int out = 0, in = 0;
int head, ret, prot_bytes;
int head, ret, prot_bytes, c = 0;
size_t req_size, rsp_size = sizeof(struct virtio_scsi_cmd_resp);
size_t out_size, in_size;
u16 lun;
@ -830,7 +836,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
vhost_disable_notify(&vs->dev, vq);
for (;;) {
do {
head = vhost_get_vq_desc(vq, vq->iov,
ARRAY_SIZE(vq->iov), &out, &in,
NULL, NULL);
@ -1045,7 +1051,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
*/
INIT_WORK(&cmd->work, vhost_scsi_submission_work);
queue_work(vhost_scsi_workqueue, &cmd->work);
}
} while (likely(!vhost_exceeds_weight(vq, ++c, 0)));
out:
mutex_unlock(&vq->mutex);
}
@ -1398,7 +1404,8 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
vqs[i] = &vs->vqs[i].vq;
vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
}
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV);
vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV,
VHOST_SCSI_WEIGHT, 0);
vhost_scsi_init_inflight(vs, NULL);

View file

@ -413,8 +413,24 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
vhost_vq_free_iovecs(dev->vqs[i]);
}
bool vhost_exceeds_weight(struct vhost_virtqueue *vq,
int pkts, int total_len)
{
struct vhost_dev *dev = vq->dev;
if ((dev->byte_weight && total_len >= dev->byte_weight) ||
pkts >= dev->weight) {
vhost_poll_queue(&vq->poll);
return true;
}
return false;
}
EXPORT_SYMBOL_GPL(vhost_exceeds_weight);
void vhost_dev_init(struct vhost_dev *dev,
struct vhost_virtqueue **vqs, int nvqs, int iov_limit)
struct vhost_virtqueue **vqs, int nvqs,
int iov_limit, int weight, int byte_weight)
{
struct vhost_virtqueue *vq;
int i;
@ -428,6 +444,8 @@ void vhost_dev_init(struct vhost_dev *dev,
dev->mm = NULL;
dev->worker = NULL;
dev->iov_limit = iov_limit;
dev->weight = weight;
dev->byte_weight = byte_weight;
init_llist_head(&dev->work_list);
init_waitqueue_head(&dev->wait);
INIT_LIST_HEAD(&dev->read_list);

View file

@ -171,10 +171,13 @@ struct vhost_dev {
struct list_head pending_list;
wait_queue_head_t wait;
int iov_limit;
int weight;
int byte_weight;
};
bool vhost_exceeds_weight(struct vhost_virtqueue *vq, int pkts, int total_len);
void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
int nvqs, int iov_limit);
int nvqs, int iov_limit, int weight, int byte_weight);
long vhost_dev_set_owner(struct vhost_dev *dev);
bool vhost_dev_has_owner(struct vhost_dev *dev);
long vhost_dev_check_owner(struct vhost_dev *);

View file

@ -21,6 +21,14 @@
#include "vhost.h"
#define VHOST_VSOCK_DEFAULT_HOST_CID 2
/* Max number of bytes transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others. */
#define VHOST_VSOCK_WEIGHT 0x80000
/* Max number of packets transferred before requeueing the job.
* Using this limit prevents one virtqueue from starving others with
* small pkts.
*/
#define VHOST_VSOCK_PKT_WEIGHT 256
enum {
VHOST_VSOCK_FEATURES = VHOST_FEATURES,
@ -78,6 +86,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
struct vhost_virtqueue *vq)
{
struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
int pkts = 0, total_len = 0;
bool added = false;
bool restart_tx = false;
@ -89,7 +98,7 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
/* Avoid further vmexits, we're already processing the virtqueue */
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
do {
struct virtio_vsock_pkt *pkt;
struct iov_iter iov_iter;
unsigned out, in;
@ -174,8 +183,9 @@ vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
*/
virtio_transport_deliver_tap_pkt(pkt);
total_len += pkt->len;
virtio_transport_free_pkt(pkt);
}
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
if (added)
vhost_signal(&vsock->dev, vq);
@ -350,7 +360,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
dev);
struct virtio_vsock_pkt *pkt;
int head;
int head, pkts = 0, total_len = 0;
unsigned int out, in;
bool added = false;
@ -360,7 +370,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
goto out;
vhost_disable_notify(&vsock->dev, vq);
for (;;) {
do {
u32 len;
if (!vhost_vsock_more_replies(vsock)) {
@ -401,9 +411,11 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
else
virtio_transport_free_pkt(pkt);
vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
len += sizeof(pkt->hdr);
vhost_add_used(vq, head, len);
total_len += len;
added = true;
}
} while(likely(!vhost_exceeds_weight(vq, ++pkts, total_len)));
no_more_replies:
if (added)
@ -531,7 +543,9 @@ static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV);
vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
VHOST_VSOCK_WEIGHT);
file->private_data = vsock;
spin_lock_init(&vsock->send_pkt_list_lock);

View file

@ -1237,20 +1237,23 @@ static int send_cap_msg(struct cap_msg_args *arg)
}
/*
* Queue cap releases when an inode is dropped from our cache. Since
* inode is about to be destroyed, there is no need for i_ceph_lock.
* Queue cap releases when an inode is dropped from our cache.
*/
void ceph_queue_caps_release(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct rb_node *p;
/* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
* may call __ceph_caps_issued_mask() on a freeing inode. */
spin_lock(&ci->i_ceph_lock);
p = rb_first(&ci->i_caps);
while (p) {
struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
p = rb_next(p);
__ceph_remove_cap(cap, true);
}
spin_unlock(&ci->i_ceph_lock);
}
/*

View file

@ -1826,7 +1826,7 @@ static int __do_execve_file(int fd, struct filename *filename,
membarrier_execve(current);
rseq_execve(current);
acct_update_integrals(current);
task_numa_free(current);
task_numa_free(current, false);
free_bprm(bprm);
kfree(pathbuf);
if (filename)

View file

@ -416,10 +416,10 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
clp = nfs_match_client(cl_init);
if (clp) {
spin_unlock(&nn->nfs_client_lock);
if (IS_ERR(clp))
return clp;
if (new)
new->rpc_ops->free_client(new);
if (IS_ERR(clp))
return clp;
return nfs_found_client(cl_init, clp);
}
if (new) {

View file

@ -1073,6 +1073,100 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
static int
nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
struct inode *inode, int error)
{
switch (error) {
case 1:
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry);
return 1;
case 0:
nfs_mark_for_revalidate(dir);
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
/*
* We can't d_drop the root of a disconnected tree:
* its d_hash is on the s_anon list and d_drop() would hide
* it from shrink_dcache_for_unmount(), leading to busy
* inodes on unmount and further oopses.
*/
if (IS_ROOT(dentry))
return 1;
}
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
__func__, dentry);
return 0;
}
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
__func__, dentry, error);
return error;
}
static int
nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
int ret = 1;
if (nfs_neg_need_reval(dir, dentry, flags)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
ret = 0;
}
return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
}
static int
nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
}
static int
nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
struct nfs_fh *fhandle;
struct nfs_fattr *fattr;
struct nfs4_label *label;
int ret;
ret = -ENOMEM;
fhandle = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
if (fhandle == NULL || fattr == NULL || IS_ERR(label))
goto out;
ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
if (ret < 0) {
if (ret == -ESTALE || ret == -ENOENT)
ret = 0;
goto out;
}
ret = 0;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out;
if (nfs_refresh_inode(inode, fattr) < 0)
goto out;
nfs_setsecurity(inode, fattr, label);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
/* set a readdirplus hint that we had a cache miss */
nfs_force_use_readdirplus(dir);
ret = 1;
out:
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
}
/*
* This is called every time the dcache has a lookup hit,
* and we should check whether we can really trust that
@ -1084,58 +1178,36 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
* If the parent directory is seen to have changed, we throw out the
* cached dentry and do a new lookup.
*/
static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
static int
nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct inode *dir;
struct inode *inode;
struct dentry *parent;
struct nfs_fh *fhandle = NULL;
struct nfs_fattr *fattr = NULL;
struct nfs4_label *label = NULL;
int error;
if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
} else {
parent = dget_parent(dentry);
dir = d_inode(parent);
}
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
if (!inode) {
if (nfs_neg_need_reval(dir, dentry, flags)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
goto out_bad;
}
goto out_valid;
}
if (!inode)
return nfs_lookup_revalidate_negative(dir, dentry, flags);
if (is_bad_inode(inode)) {
if (flags & LOOKUP_RCU)
return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
}
if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
goto out_set_verifier;
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* Force a full look up iff the parent directory has changed */
if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
error = nfs_lookup_verify_inode(inode, flags);
if (error) {
if (flags & LOOKUP_RCU)
return -ECHILD;
if (error == -ESTALE)
goto out_zap_parent;
goto out_error;
nfs_zap_caches(dir);
goto out_bad;
}
nfs_advise_use_readdirplus(dir);
goto out_valid;
@ -1147,81 +1219,45 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
if (NFS_STALE(inode))
goto out_bad;
error = -ENOMEM;
fhandle = nfs_alloc_fhandle();
fattr = nfs_alloc_fattr();
if (fhandle == NULL || fattr == NULL)
goto out_error;
label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
if (IS_ERR(label))
goto out_error;
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
if (error == -ESTALE || error == -ENOENT)
goto out_bad;
if (error)
goto out_error;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out_bad;
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
goto out_bad;
return error;
out_valid:
return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
out_bad:
if (flags & LOOKUP_RCU)
return -ECHILD;
return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
}
nfs_setsecurity(inode, fattr, label);
static int
__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
int (*reval)(struct inode *, struct dentry *, unsigned int))
{
struct dentry *parent;
struct inode *dir;
int ret;
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
/* set a readdirplus hint that we had a cache miss */
nfs_force_use_readdirplus(dir);
out_set_verifier:
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
out_valid:
if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
ret = reval(dir, dentry, flags);
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
} else
} else {
parent = dget_parent(dentry);
ret = reval(d_inode(parent), dentry, flags);
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
__func__, dentry);
return 1;
out_zap_parent:
nfs_zap_caches(dir);
out_bad:
WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
nfs_mark_for_revalidate(dir);
if (inode && S_ISDIR(inode->i_mode)) {
/* Purge readdir caches. */
nfs_zap_caches(inode);
/*
* We can't d_drop the root of a disconnected tree:
* its d_hash is on the s_anon list and d_drop() would hide
* it from shrink_dcache_for_unmount(), leading to busy
* inodes on unmount and further oopses.
*/
if (IS_ROOT(dentry))
goto out_valid;
}
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
__func__, dentry);
return 0;
out_error:
WARN_ON(flags & LOOKUP_RCU);
nfs_free_fattr(fattr);
nfs_free_fhandle(fhandle);
nfs4_label_free(label);
dput(parent);
dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
__func__, dentry, error);
return error;
return ret;
}
static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
}
/*
@ -1580,62 +1616,55 @@ no_open:
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
static int
nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct inode *inode;
int ret = 0;
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
goto no_open;
goto full_reval;
if (d_mountpoint(dentry))
goto no_open;
if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
goto no_open;
goto full_reval;
inode = d_inode(dentry);
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
if (inode == NULL) {
struct dentry *parent;
struct inode *dir;
if (inode == NULL)
goto full_reval;
if (flags & LOOKUP_RCU) {
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
} else {
parent = dget_parent(dentry);
dir = d_inode(parent);
}
if (!nfs_neg_need_reval(dir, dentry, flags))
ret = 1;
else if (flags & LOOKUP_RCU)
ret = -ECHILD;
if (!(flags & LOOKUP_RCU))
dput(parent);
else if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
goto out;
}
if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
goto no_open;
goto full_reval;
/* We cannot do exclusive creation on a positive dentry */
if (flags & LOOKUP_EXCL)
goto no_open;
if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
goto reval_dentry;
/* Check if the directory changed */
if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
goto reval_dentry;
/* Let f_op->open() actually open (and revalidate) the file */
ret = 1;
return 1;
reval_dentry:
if (flags & LOOKUP_RCU)
return -ECHILD;
return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
out:
return ret;
full_reval:
return nfs_do_lookup_revalidate(dir, dentry, flags);
}
no_open:
return nfs_lookup_revalidate(dentry, flags);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
return __nfs_lookup_revalidate(dentry, flags,
nfs4_do_lookup_revalidate);
}
#endif /* CONFIG_NFSV4 */

View file

@ -1355,12 +1355,20 @@ static bool nfs4_mode_match_open_stateid(struct nfs4_state *state,
return false;
}
static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
static int can_open_cached(struct nfs4_state *state, fmode_t mode,
int open_mode, enum open_claim_type4 claim)
{
int ret = 0;
if (open_mode & (O_EXCL|O_TRUNC))
goto out;
switch (claim) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_FH:
goto out;
default:
break;
}
switch (mode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
@ -1753,7 +1761,7 @@ static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
for (;;) {
spin_lock(&state->owner->so_lock);
if (can_open_cached(state, fmode, open_mode)) {
if (can_open_cached(state, fmode, open_mode, claim)) {
update_open_stateflags(state, fmode);
spin_unlock(&state->owner->so_lock);
goto out_return_state;
@ -2282,7 +2290,8 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
if (data->state != NULL) {
struct nfs_delegation *delegation;
if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
if (can_open_cached(data->state, data->o_arg.fmode,
data->o_arg.open_flags, claim))
goto out_no_action;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);

View file

@ -206,12 +206,53 @@ static int proc_root_link(struct dentry *dentry, struct path *path)
return result;
}
/*
* If the user used setproctitle(), we just get the string from
* user space at arg_start, and limit it to a maximum of one page.
*/
static ssize_t get_mm_proctitle(struct mm_struct *mm, char __user *buf,
size_t count, unsigned long pos,
unsigned long arg_start)
{
char *page;
int ret, got;
if (pos >= PAGE_SIZE)
return 0;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
ret = 0;
got = access_remote_vm(mm, arg_start, page, PAGE_SIZE, FOLL_ANON);
if (got > 0) {
int len = strnlen(page, got);
/* Include the NUL character if it was found */
if (len < got)
len++;
if (len > pos) {
len -= pos;
if (len > count)
len = count;
len -= copy_to_user(buf, page+pos, len);
if (!len)
len = -EFAULT;
ret = len;
}
}
free_page((unsigned long)page);
return ret;
}
static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
size_t count, loff_t *ppos)
{
unsigned long arg_start, arg_end, env_start, env_end;
unsigned long pos, len;
char *page;
char *page, c;
/* Check if process spawned far enough to have cmdline. */
if (!mm->env_end)
@ -228,28 +269,42 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
return 0;
/*
* We have traditionally allowed the user to re-write
* the argument strings and overflow the end result
* into the environment section. But only do that if
* the environment area is contiguous to the arguments.
* We allow setproctitle() to overwrite the argument
* strings, and overflow past the original end. But
* only when it overflows into the environment area.
*/
if (env_start != arg_end || env_start >= env_end)
if (env_start != arg_end || env_end < env_start)
env_start = env_end = arg_end;
/* .. and limit it to a maximum of one page of slop */
if (env_end >= arg_end + PAGE_SIZE)
env_end = arg_end + PAGE_SIZE - 1;
len = env_end - arg_start;
/* We're not going to care if "*ppos" has high bits set */
pos = arg_start + *ppos;
/* .. but we do check the result is in the proper range */
if (pos < arg_start || pos >= env_end)
pos = *ppos;
if (pos >= len)
return 0;
if (count > len - pos)
count = len - pos;
if (!count)
return 0;
/* .. and we never go past env_end */
if (env_end - pos < count)
count = env_end - pos;
/*
* Magical special case: if the argv[] end byte is not
* zero, the user has overwritten it with setproctitle(3).
*
* Possible future enhancement: do this only once when
* pos is 0, and set a flag in the 'struct file'.
*/
if (access_remote_vm(mm, arg_end-1, &c, 1, FOLL_ANON) == 1 && c)
return get_mm_proctitle(mm, buf, count, pos, arg_start);
/*
* For the non-setproctitle() case we limit things strictly
* to the [arg_start, arg_end[ range.
*/
pos += arg_start;
if (pos < arg_start || pos >= arg_end)
return 0;
if (count > arg_end - pos)
count = arg_end - pos;
page = (char *)__get_free_page(GFP_KERNEL);
if (!page)
@ -259,48 +314,11 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
while (count) {
int got;
size_t size = min_t(size_t, PAGE_SIZE, count);
long offset;
/*
* Are we already starting past the official end?
* We always include the last byte that is *supposed*
* to be NUL
*/
offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
if (got <= offset)
got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
if (got <= 0)
break;
got -= offset;
/* Don't walk past a NUL character once you hit arg_end */
if (pos + got >= arg_end) {
int n = 0;
/*
* If we started before 'arg_end' but ended up
* at or after it, we start the NUL character
* check at arg_end-1 (where we expect the normal
* EOF to be).
*
* NOTE! This is smaller than 'got', because
* pos + got >= arg_end
*/
if (pos < arg_end)
n = arg_end - pos - 1;
/* Cut off at first NUL after 'n' */
got = n + strnlen(page+n, offset+got-n);
if (got < offset)
break;
got -= offset;
/* Include the NUL if it existed */
if (got < size)
got++;
}
got -= copy_to_user(buf, page+offset, got);
got -= copy_to_user(buf, page, got);
if (unlikely(!got)) {
if (!len)
len = -EFAULT;

View file

@ -504,6 +504,12 @@ struct request_queue {
* various queue flags, see QUEUE_* below
*/
unsigned long queue_flags;
/*
* Number of contexts that have called blk_set_pm_only(). If this
* counter is above zero then only RQF_PM and RQF_PREEMPT requests are
* processed.
*/
atomic_t pm_only;
/*
* ida allocated id for this queue. Used to index queues from
@ -698,7 +704,6 @@ struct request_queue {
#define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */
#define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
#define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */
#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
@ -736,12 +741,11 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q);
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
REQ_FAILFAST_DRIVER))
#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
#define blk_queue_preempt_only(q) \
test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
#define blk_queue_fua(q) test_bit(QUEUE_FLAG_FUA, &(q)->queue_flags)
extern int blk_set_preempt_only(struct request_queue *q);
extern void blk_clear_preempt_only(struct request_queue *q);
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
static inline int queue_in_flight(struct request_queue *q)
{

View file

@ -156,6 +156,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo,
void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn);
bool has_iova_flush_queue(struct iova_domain *iovad);
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
@ -236,6 +237,11 @@ static inline void init_iova_domain(struct iova_domain *iovad,
{
}
static inline bool has_iova_flush_queue(struct iova_domain *iovad)
{
return false;
}
static inline int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb,
iova_entry_dtor entry_dtor)

View file

@ -1026,7 +1026,15 @@ struct task_struct {
u64 last_sum_exec_runtime;
struct callback_head numa_work;
struct numa_group *numa_group;
/*
* This pointer is only modified for current in syscall and
* pagefault context (and for tasks being destroyed), so it can be read
* from any of the following contexts:
* - RCU read-side critical section
* - current->numa_group from everywhere
* - task's runqueue locked, task not running
*/
struct numa_group __rcu *numa_group;
/*
* numa_faults is an array split into four regions:

View file

@ -19,7 +19,7 @@
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
extern void task_numa_free(struct task_struct *p);
extern void task_numa_free(struct task_struct *p, bool final);
extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
int src_nid, int dst_cpu);
#else
@ -34,7 +34,7 @@ static inline pid_t task_numa_group_id(struct task_struct *p)
static inline void set_numabalancing_state(bool enabled)
{
}
static inline void task_numa_free(struct task_struct *p)
static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,

View file

@ -682,7 +682,7 @@ void __put_task_struct(struct task_struct *tsk)
WARN_ON(tsk == current);
cgroup_free(tsk);
task_numa_free(tsk);
task_numa_free(tsk, true);
security_task_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);

View file

@ -1051,6 +1051,21 @@ struct numa_group {
unsigned long faults[0];
};
/*
* For functions that can be called in multiple contexts that permit reading
* ->numa_group (see struct task_struct for locking rules).
*/
static struct numa_group *deref_task_numa_group(struct task_struct *p)
{
return rcu_dereference_check(p->numa_group, p == current ||
(lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
}
static struct numa_group *deref_curr_numa_group(struct task_struct *p)
{
return rcu_dereference_protected(p->numa_group, p == current);
}
static inline unsigned long group_faults_priv(struct numa_group *ng);
static inline unsigned long group_faults_shared(struct numa_group *ng);
@ -1094,10 +1109,12 @@ static unsigned int task_scan_start(struct task_struct *p)
{
unsigned long smin = task_scan_min(p);
unsigned long period = smin;
struct numa_group *ng;
/* Scale the maximum scan period with the amount of shared memory. */
if (p->numa_group) {
struct numa_group *ng = p->numa_group;
rcu_read_lock();
ng = rcu_dereference(p->numa_group);
if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
@ -1105,6 +1122,7 @@ static unsigned int task_scan_start(struct task_struct *p)
period *= shared + 1;
period /= private + shared + 1;
}
rcu_read_unlock();
return max(smin, period);
}
@ -1113,13 +1131,14 @@ static unsigned int task_scan_max(struct task_struct *p)
{
unsigned long smin = task_scan_min(p);
unsigned long smax;
struct numa_group *ng;
/* Watch for min being lower than max due to floor calculations */
smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
/* Scale the maximum scan period with the amount of shared memory. */
if (p->numa_group) {
struct numa_group *ng = p->numa_group;
ng = deref_curr_numa_group(p);
if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
unsigned long period = smax;
@ -1151,7 +1170,7 @@ void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
p->numa_group = NULL;
RCU_INIT_POINTER(p->numa_group, NULL);
p->last_task_numa_placement = 0;
p->last_sum_exec_runtime = 0;
@ -1198,7 +1217,16 @@ static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
pid_t task_numa_group_id(struct task_struct *p)
{
return p->numa_group ? p->numa_group->gid : 0;
struct numa_group *ng;
pid_t gid = 0;
rcu_read_lock();
ng = rcu_dereference(p->numa_group);
if (ng)
gid = ng->gid;
rcu_read_unlock();
return gid;
}
/*
@ -1223,11 +1251,13 @@ static inline unsigned long task_faults(struct task_struct *p, int nid)
static inline unsigned long group_faults(struct task_struct *p, int nid)
{
if (!p->numa_group)
struct numa_group *ng = deref_task_numa_group(p);
if (!ng)
return 0;
return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
}
static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
@ -1365,12 +1395,13 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
static inline unsigned long group_weight(struct task_struct *p, int nid,
int dist)
{
struct numa_group *ng = deref_task_numa_group(p);
unsigned long faults, total_faults;
if (!p->numa_group)
if (!ng)
return 0;
total_faults = p->numa_group->total_faults;
total_faults = ng->total_faults;
if (!total_faults)
return 0;
@ -1384,7 +1415,7 @@ static inline unsigned long group_weight(struct task_struct *p, int nid,
bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
int src_nid, int dst_cpu)
{
struct numa_group *ng = p->numa_group;
struct numa_group *ng = deref_curr_numa_group(p);
int dst_nid = cpu_to_node(dst_cpu);
int last_cpupid, this_cpupid;
@ -1589,13 +1620,14 @@ static bool load_too_imbalanced(long src_load, long dst_load,
static void task_numa_compare(struct task_numa_env *env,
long taskimp, long groupimp, bool maymove)
{
struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
struct rq *dst_rq = cpu_rq(env->dst_cpu);
long imp = p_ng ? groupimp : taskimp;
struct task_struct *cur;
long src_load, dst_load;
long load;
long imp = env->p->numa_group ? groupimp : taskimp;
long moveimp = imp;
int dist = env->dist;
long moveimp = imp;
long load;
if (READ_ONCE(dst_rq->numa_migrate_on))
return;
@ -1634,21 +1666,22 @@ static void task_numa_compare(struct task_numa_env *env,
* If dst and source tasks are in the same NUMA group, or not
* in any group then look only at task weights.
*/
if (cur->numa_group == env->p->numa_group) {
cur_ng = rcu_dereference(cur->numa_group);
if (cur_ng == p_ng) {
imp = taskimp + task_weight(cur, env->src_nid, dist) -
task_weight(cur, env->dst_nid, dist);
/*
* Add some hysteresis to prevent swapping the
* tasks within a group over tiny differences.
*/
if (cur->numa_group)
if (cur_ng)
imp -= imp / 16;
} else {
/*
* Compare the group weights. If a task is all by itself
* (not part of a group), use the task weight instead.
*/
if (cur->numa_group && env->p->numa_group)
if (cur_ng && p_ng)
imp += group_weight(cur, env->src_nid, dist) -
group_weight(cur, env->dst_nid, dist);
else
@ -1746,11 +1779,12 @@ static int task_numa_migrate(struct task_struct *p)
.best_imp = 0,
.best_cpu = -1,
};
struct sched_domain *sd;
struct rq *best_rq;
unsigned long taskweight, groupweight;
int nid, ret, dist;
struct sched_domain *sd;
long taskimp, groupimp;
struct numa_group *ng;
struct rq *best_rq;
int nid, ret, dist;
/*
* Pick the lowest SD_NUMA domain, as that would have the smallest
@ -1796,7 +1830,8 @@ static int task_numa_migrate(struct task_struct *p)
* multiple NUMA nodes; in order to better consolidate the group,
* we need to check other locations.
*/
if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
ng = deref_curr_numa_group(p);
if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
for_each_online_node(nid) {
if (nid == env.src_nid || nid == p->numa_preferred_nid)
continue;
@ -1829,7 +1864,7 @@ static int task_numa_migrate(struct task_struct *p)
* A task that migrated to a second choice node will be better off
* trying for a better one later. Do not set the preferred node here.
*/
if (p->numa_group) {
if (ng) {
if (env.best_cpu == -1)
nid = env.src_nid;
else
@ -2124,6 +2159,7 @@ static void task_numa_placement(struct task_struct *p)
unsigned long total_faults;
u64 runtime, period;
spinlock_t *group_lock = NULL;
struct numa_group *ng;
/*
* The p->mm->numa_scan_seq field gets updated without
@ -2141,8 +2177,9 @@ static void task_numa_placement(struct task_struct *p)
runtime = numa_get_avg_runtime(p, &period);
/* If the task is part of a group prevent parallel updates to group stats */
if (p->numa_group) {
group_lock = &p->numa_group->lock;
ng = deref_curr_numa_group(p);
if (ng) {
group_lock = &ng->lock;
spin_lock_irq(group_lock);
}
@ -2183,7 +2220,7 @@ static void task_numa_placement(struct task_struct *p)
p->numa_faults[cpu_idx] += f_diff;
faults += p->numa_faults[mem_idx];
p->total_numa_faults += diff;
if (p->numa_group) {
if (ng) {
/*
* safe because we can only change our own group
*
@ -2191,14 +2228,14 @@ static void task_numa_placement(struct task_struct *p)
* nid and priv in a specific region because it
* is at the beginning of the numa_faults array.
*/
p->numa_group->faults[mem_idx] += diff;
p->numa_group->faults_cpu[mem_idx] += f_diff;
p->numa_group->total_faults += diff;
group_faults += p->numa_group->faults[mem_idx];
ng->faults[mem_idx] += diff;
ng->faults_cpu[mem_idx] += f_diff;
ng->total_faults += diff;
group_faults += ng->faults[mem_idx];
}
}
if (!p->numa_group) {
if (!ng) {
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
@ -2209,8 +2246,8 @@ static void task_numa_placement(struct task_struct *p)
}
}
if (p->numa_group) {
numa_group_count_active_nodes(p->numa_group);
if (ng) {
numa_group_count_active_nodes(ng);
spin_unlock_irq(group_lock);
max_nid = preferred_group_nid(p, max_nid);
}
@ -2244,7 +2281,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
int cpu = cpupid_to_cpu(cpupid);
int i;
if (unlikely(!p->numa_group)) {
if (unlikely(!deref_curr_numa_group(p))) {
unsigned int size = sizeof(struct numa_group) +
4*nr_node_ids*sizeof(unsigned long);
@ -2280,7 +2317,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
if (!grp)
goto no_join;
my_grp = p->numa_group;
my_grp = deref_curr_numa_group(p);
if (grp == my_grp)
goto no_join;
@ -2342,13 +2379,24 @@ no_join:
return;
}
void task_numa_free(struct task_struct *p)
/*
* Get rid of NUMA staticstics associated with a task (either current or dead).
* If @final is set, the task is dead and has reached refcount zero, so we can
* safely free all relevant data structures. Otherwise, there might be
* concurrent reads from places like load balancing and procfs, and we should
* reset the data back to default state without freeing ->numa_faults.
*/
void task_numa_free(struct task_struct *p, bool final)
{
struct numa_group *grp = p->numa_group;
void *numa_faults = p->numa_faults;
/* safe: p either is current or is being freed by current */
struct numa_group *grp = rcu_dereference_raw(p->numa_group);
unsigned long *numa_faults = p->numa_faults;
unsigned long flags;
int i;
if (!numa_faults)
return;
if (grp) {
spin_lock_irqsave(&grp->lock, flags);
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
@ -2361,8 +2409,14 @@ void task_numa_free(struct task_struct *p)
put_numa_group(grp);
}
p->numa_faults = NULL;
kfree(numa_faults);
if (final) {
p->numa_faults = NULL;
kfree(numa_faults);
} else {
p->total_numa_faults = 0;
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
numa_faults[i] = 0;
}
}
/*
@ -2415,7 +2469,7 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
* actively using should be counted as local. This allows the
* scan rate to slow down when a workload has settled down.
*/
ng = p->numa_group;
ng = deref_curr_numa_group(p);
if (!priv && !local && ng && ng->active_nodes > 1 &&
numa_is_active_node(cpu_node, ng) &&
numa_is_active_node(mem_node, ng))
@ -11223,18 +11277,22 @@ void show_numa_stats(struct task_struct *p, struct seq_file *m)
{
int node;
unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
struct numa_group *ng;
rcu_read_lock();
ng = rcu_dereference(p->numa_group);
for_each_online_node(node) {
if (p->numa_faults) {
tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
}
if (p->numa_group) {
gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
if (ng) {
gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
}
print_numa_stats(m, node, tsf, tpf, gsf, gpf);
}
rcu_read_unlock();
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */

View file

@ -89,9 +89,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
__ip_select_ident(net, iph, skb_shinfo(skb)->gso_segs ?: 1);
err = ip_local_out(net, sk, skb);
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
iptunnel_xmit_stats(dev, pkt_len);
if (dev) {
if (unlikely(net_xmit_eval(err)))
pkt_len = 0;
iptunnel_xmit_stats(dev, pkt_len);
}
}
EXPORT_SYMBOL_GPL(iptunnel_xmit);

View file

@ -281,7 +281,8 @@ EXPORT_SYMBOL_GPL(vsock_insert_connected);
void vsock_remove_bound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
__vsock_remove_bound(vsk);
if (__vsock_in_bound_table(vsk))
__vsock_remove_bound(vsk);
spin_unlock_bh(&vsock_table_lock);
}
EXPORT_SYMBOL_GPL(vsock_remove_bound);
@ -289,7 +290,8 @@ EXPORT_SYMBOL_GPL(vsock_remove_bound);
void vsock_remove_connected(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
__vsock_remove_connected(vsk);
if (__vsock_in_connected_table(vsk))
__vsock_remove_connected(vsk);
spin_unlock_bh(&vsock_table_lock);
}
EXPORT_SYMBOL_GPL(vsock_remove_connected);
@ -325,35 +327,10 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
}
EXPORT_SYMBOL_GPL(vsock_find_connected_socket);
static bool vsock_in_bound_table(struct vsock_sock *vsk)
{
bool ret;
spin_lock_bh(&vsock_table_lock);
ret = __vsock_in_bound_table(vsk);
spin_unlock_bh(&vsock_table_lock);
return ret;
}
static bool vsock_in_connected_table(struct vsock_sock *vsk)
{
bool ret;
spin_lock_bh(&vsock_table_lock);
ret = __vsock_in_connected_table(vsk);
spin_unlock_bh(&vsock_table_lock);
return ret;
}
void vsock_remove_sock(struct vsock_sock *vsk)
{
if (vsock_in_bound_table(vsk))
vsock_remove_bound(vsk);
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
vsock_remove_bound(vsk);
vsock_remove_connected(vsk);
}
EXPORT_SYMBOL_GPL(vsock_remove_sock);
@ -484,8 +461,7 @@ static void vsock_pending_work(struct work_struct *work)
* incoming packets can't find this socket, and to reduce the reference
* count.
*/
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
vsock_remove_connected(vsk);
sk->sk_state = TCP_CLOSE;

View file

@ -35,6 +35,9 @@
/* The MTU is 16KB per the host side's design */
#define HVS_MTU_SIZE (1024 * 16)
/* How long to wait for graceful shutdown of a connection */
#define HVS_CLOSE_TIMEOUT (8 * HZ)
struct vmpipe_proto_header {
u32 pkt_type;
u32 data_size;
@ -290,19 +293,32 @@ static void hvs_channel_cb(void *ctx)
sk->sk_write_space(sk);
}
static void hvs_do_close_lock_held(struct vsock_sock *vsk,
bool cancel_timeout)
{
struct sock *sk = sk_vsock(vsk);
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
(!cancel_timeout || cancel_delayed_work(&vsk->close_work))) {
vsk->close_work_scheduled = false;
vsock_remove_sock(vsk);
/* Release the reference taken while scheduling the timeout */
sock_put(sk);
}
}
static void hvs_close_connection(struct vmbus_channel *chan)
{
struct sock *sk = get_per_channel_state(chan);
struct vsock_sock *vsk = vsock_sk(sk);
lock_sock(sk);
sk->sk_state = TCP_CLOSE;
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN;
sk->sk_state_change(sk);
hvs_do_close_lock_held(vsock_sk(sk), true);
release_sock(sk);
}
@ -445,50 +461,80 @@ static int hvs_connect(struct vsock_sock *vsk)
return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id);
}
static void hvs_shutdown_lock_held(struct hvsock *hvs, int mode)
{
struct vmpipe_proto_header hdr;
if (hvs->fin_sent || !hvs->chan)
return;
/* It can't fail: see hvs_channel_writable_bytes(). */
(void)hvs_send_data(hvs->chan, (struct hvs_send_buf *)&hdr, 0);
hvs->fin_sent = true;
}
static int hvs_shutdown(struct vsock_sock *vsk, int mode)
{
struct sock *sk = sk_vsock(vsk);
struct vmpipe_proto_header hdr;
struct hvs_send_buf *send_buf;
struct hvsock *hvs;
if (!(mode & SEND_SHUTDOWN))
return 0;
lock_sock(sk);
hvs = vsk->trans;
if (hvs->fin_sent)
goto out;
send_buf = (struct hvs_send_buf *)&hdr;
/* It can't fail: see hvs_channel_writable_bytes(). */
(void)hvs_send_data(hvs->chan, send_buf, 0);
hvs->fin_sent = true;
out:
hvs_shutdown_lock_held(vsk->trans, mode);
release_sock(sk);
return 0;
}
static void hvs_close_timeout(struct work_struct *work)
{
struct vsock_sock *vsk =
container_of(work, struct vsock_sock, close_work.work);
struct sock *sk = sk_vsock(vsk);
sock_hold(sk);
lock_sock(sk);
if (!sock_flag(sk, SOCK_DONE))
hvs_do_close_lock_held(vsk, false);
vsk->close_work_scheduled = false;
release_sock(sk);
sock_put(sk);
}
/* Returns true, if it is safe to remove socket; false otherwise */
static bool hvs_close_lock_held(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
if (!(sk->sk_state == TCP_ESTABLISHED ||
sk->sk_state == TCP_CLOSING))
return true;
if ((sk->sk_shutdown & SHUTDOWN_MASK) != SHUTDOWN_MASK)
hvs_shutdown_lock_held(vsk->trans, SHUTDOWN_MASK);
if (sock_flag(sk, SOCK_DONE))
return true;
/* This reference will be dropped by the delayed close routine */
sock_hold(sk);
INIT_DELAYED_WORK(&vsk->close_work, hvs_close_timeout);
vsk->close_work_scheduled = true;
schedule_delayed_work(&vsk->close_work, HVS_CLOSE_TIMEOUT);
return false;
}
static void hvs_release(struct vsock_sock *vsk)
{
struct sock *sk = sk_vsock(vsk);
struct hvsock *hvs = vsk->trans;
struct vmbus_channel *chan;
bool remove_sock;
lock_sock(sk);
sk->sk_state = TCP_CLOSING;
vsock_remove_sock(vsk);
remove_sock = hvs_close_lock_held(vsk);
release_sock(sk);
chan = hvs->chan;
if (chan)
hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN);
if (remove_sock)
vsock_remove_sock(vsk);
}
static void hvs_destruct(struct vsock_sock *vsk)