Re: [PATCH] PCI: dwc: Chain the set IRQ affinity request back to the parent

Thomas Gleixner <tglx@xxxxxxxxxxxxx> · Mon, 03 Mar 2025 10:10:04 +0100

On Mon, Mar 03 2025 at 07:05, Daniel Tsai wrote:
> +/*
> + * The algo here honor if there is any intersection of mask of
> + * the existing MSI vectors and the requesting MSI vector. So we
> + * could handle both narrow (1 bit set mask) and wide (0xffff...)
> + * cases, return -EINVAL and reject the request if the result of
> + * cpumask is empty, otherwise return 0 and have the calculated
> + * result on the mask_to_check to pass down to the irq_chip.
> + */
> +static int dw_pci_check_mask_compatibility(struct dw_pcie_rp *pp,
> +					   unsigned long ctrl,
> +					   unsigned long hwirq_to_check,
> +					   struct cpumask *mask_to_check)
> +{
> +	unsigned long end, hwirq;
> +	const struct cpumask *mask;
> +	unsigned int virq;
> +
> +	hwirq = ctrl * MAX_MSI_IRQS_PER_CTRL;
> +	end = hwirq + MAX_MSI_IRQS_PER_CTRL;
> +	for_each_set_bit_from(hwirq, pp->msi_irq_in_use, end) {
> +		if (hwirq == hwirq_to_check)
> +			continue;
> +		virq = irq_find_mapping(pp->irq_domain, hwirq);
> +		if (!virq)
> +			continue;
> +		mask = irq_get_affinity_mask(virq);

What protects @mask against a concurrent modification?

> +		if (!cpumask_and(mask_to_check, mask, mask_to_check))
> +			return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static void dw_pci_update_effective_affinity(struct dw_pcie_rp *pp,
> +					     unsigned long ctrl,
> +					     const struct cpumask *effective_mask,
> +					     unsigned long hwirq_to_check)
> +{
> +	struct irq_desc *desc_downstream;
> +	unsigned int virq_downstream;
> +	unsigned long end, hwirq;
> +
> +	/*
> +	 * Update all the irq_data's effective mask
> +	 * bind to this MSI controller, so the correct
> +	 * affinity would reflect on
> +	 * /proc/irq/XXX/effective_affinity
> +	 */
> +	hwirq = ctrl * MAX_MSI_IRQS_PER_CTRL;
> +	end = hwirq + MAX_MSI_IRQS_PER_CTRL;
> +	for_each_set_bit_from(hwirq, pp->msi_irq_in_use, end) {
> +		virq_downstream = irq_find_mapping(pp->irq_domain, hwirq);
> +		if (!virq_downstream)
> +			continue;
> +		desc_downstream = irq_to_desc(virq_downstream);
> +		irq_data_update_effective_affinity(&desc_downstream->irq_data,
> +						   effective_mask);

Same here.

> +	}
> +}
> +
> +static int dw_pci_msi_set_affinity(struct irq_data *d,
> +				   const struct cpumask *mask, bool force)
> +{
> +	struct dw_pcie_rp *pp = irq_data_get_irq_chip_data(d);
> +	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
> +	int ret;
> +	int virq_parent;
> +	unsigned long hwirq = d->hwirq;
> +	unsigned long flags, ctrl;
> +	struct irq_desc *desc_parent;
> +	const struct cpumask *effective_mask;
> +	cpumask_var_t mask_result;
> +
> +	ctrl = hwirq / MAX_MSI_IRQS_PER_CTRL;
> +	if (!alloc_cpumask_var(&mask_result, GFP_ATOMIC))
> +		return -ENOMEM;

This does not work on a RT enabled kernel. Allocations with a raw spin
lock held are not possible.

> +	/*
> +	 * Loop through all possible MSI vector to check if the
> +	 * requested one is compatible with all of them
> +	 */
> +	raw_spin_lock_irqsave(&pp->lock, flags);
> +	cpumask_copy(mask_result, mask);
> +	ret = dw_pci_check_mask_compatibility(pp, ctrl, hwirq, mask_result);
> +	if (ret) {
> +		dev_dbg(pci->dev, "Incompatible mask, request %*pbl, irq num %u\n",
> +			cpumask_pr_args(mask), d->irq);
> +		goto unlock;
> +	}
> +
> +	dev_dbg(pci->dev, "Final mask, request %*pbl, irq num %u\n",
> +		cpumask_pr_args(mask_result), d->irq);
> +
> +	virq_parent = pp->msi_irq[ctrl];
> +	desc_parent = irq_to_desc(virq_parent);
> +	ret = desc_parent->irq_data.chip->irq_set_affinity(&desc_parent->irq_data,
> +							   mask_result, force);

Again. Completely unserialized.

Thanks,

        tglx