Skip to content
Advertisement

Interrupt Handler Stops Working After Kernel Module Reload (Xilinx FPGA / PCIe)

I am currently working on a PCI driver for the Xilinx Kintex 7 board using the Xilinx PCI IP core (AXI Memory Mapped to PCIe). One problem is, that the interrupt handler stops working when I reload the kernel module. In more detail:

  1. Fresh boot of my machine
  2. Load the kernel module and monitor the kernel messages with dmesg
  3. /proc/interrupts shows the expected interrupt ids
  4. I trigger the HW interrupt and everything works as expected; I can see the interrupt handler working.
  5. rmmod my_module
  6. /proc/interrupts removed the interrupt ids as expected
  7. insmod my_module and trigger interrupt
  8. Now the interrupt handler is silent and /proc/interrupts does not increase the counter

I reboot my machine and everything works again. The fact that I do not have to restart the FPGA lets me assume that I do something wrong in the kernel module and its probably not an HW problem.

I’ve already used /sys/pci/devices/.../reset, /sys/bus/pci/devices/.../remove and /sys/bus/pci/rescan to try to reach a state which is equivalent to freshly booted machine. But nothing worked.

Relevant module code:

#define VENDOR_ID 0x10EE
#define DEVICE_ID 0x7024

static dev_t pci_dev_number;
static struct cdev * driver_object;
static struct class * pci_class;
static struct device * pci_prc;
static struct device * pci_irq_0;
static struct device * pci_irq_1;

static int msi_vec_num = 2; // Number of requested MSI interrupts
static int msi_0 = -1;
static int msi_1 = -1;

// Used for poll and select
static DECLARE_WAIT_QUEUE_HEAD(queue_vs0);
static DECLARE_WAIT_QUEUE_HEAD(queue_vs1);

static irqreturn_t pci_isr_0(int irq, void * dev_id) {
  printk(KERN_NOTICE "codec IRQ: interrupt handler 0. IRQ: %dn", irq);
  wake_up_interruptible(&queue_vs0);
  return IRQ_HANDLED;
}

static irqreturn_t pci_isr_1(int irq, void * dev_id) {
  printk(KERN_NOTICE "codec IRQ: interrupt handler 1. IRQ: %dn", irq);
  wake_up_interruptible(&queue_vs1);
  return IRQ_HANDLED;
}

static void* bars[PCIE_BARS] = {0};

static int device_init(struct pci_dev * pdev, const struct pci_device_id * id) {
  int i = 0; // loop var
  if (pci_enable_device(pdev))
    return -EIO;

  // Request memory regions for bar 0 to 2
  for (i = 0; i < PCIE_BARS; i++) {
    if (pci_request_region(pdev, i, "codec_pci") != 0) {
      dev_err( & pdev - > dev, "Bar %d - I/O address conflict for device "%s"n", i, pdev - > dev.kobj.name);
      return -EIO;
    }
  }

  // DEBUG: Check if we are in memory space (which we should) or io space
  if ((pci_resource_flags(pdev, 0) & IORESOURCE_IO)) {
    printk(KERN_NOTICE "codec INIT: in io spacen");
  } else if ((pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
    printk(KERN_NOTICE "codec INIT: in mem_spacen");
  }

  // This request enables MSI_enable in the hardware
  msi_vec_num = pci_alloc_irq_vectors(pdev, 1, msi_vec_num, PCI_IRQ_MSI);

  // msi_N will contain the IRQ number - see /proc/interrupts
  msi_0 = pci_irq_vector(pdev, 0);
  msi_1 = pci_irq_vector(pdev, 1);
  printk(KERN_NOTICE "codec INIT: nvec: %dn", msi_vec_num);
  printk(KERN_NOTICE "codec INIT: msi_0: %dn", msi_0);
  printk(KERN_NOTICE "codec INIT: msi_1: %dn", msi_1);

  if (request_irq(msi_0, pci_isr_0, IRQF_SHARED, "codec_pci", pdev)) {
    dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.n", msi_0);
    goto cleanup;
  };
  if (request_irq(msi_1, pci_isr_1, IRQF_SHARED, "codec_pci", pdev)) {
    dev_err( & pdev - > dev, "codec INIT: IRQ MSI %d not free.n", msi_1);
    goto cleanup;
  };

  for (i = 0; i < PCIE_BARS; i++) {
    // Last parameter is the address space/length of each bar. Defined in the PCIe core.
    bars[i] = pci_iomap(pdev, i, pci_resource_len(pdev, i));
    if (bars[i] == NULL) {
      printk(KERN_ERR "codec INIT: bar %d allocation failedn", i);
      goto cleanup;
    }
    printk(KERN_NOTICE "codec INIT: bar %d pointer: %pn", i, bars[i]);
  }

  printk(KERN_NOTICE "codec INIT: loadedn");

  return 0;
cleanup:
    for (i = 0; i < PCIE_BARS; i++) {
      if (bars[i] != NULL)
        pci_iounmap(pdev, bars[i]);
      pci_release_region(pdev, i);
    }
    
  return -EIO;
}

static void device_deinit(struct pci_dev * pdev) {
  int i = 0; // loop var

  if (msi_0 >= 0)
    free_irq(msi_0, pdev);

  if (msi_1 >= 0)
    free_irq(msi_1, pdev);

  pci_free_irq_vectors(pdev);

  // release bar regions
  for (i = 0; i < PCIE_BARS; i++)
    pci_release_region(pdev, i);

  for (i = 0; i < PCIE_BARS; i++) {
    if (bars[i] != NULL)
      pci_iounmap(pdev, bars[i]);
  }
  pci_disable_device(pdev);
}

// File operations not in this snipped
static struct file_operations fops = {
  .owner = THIS_MODULE,
  .open = device_open,
  .read = device_read,
  .write = device_write,
  .poll = device_poll
};

static struct pci_device_id pci_drv_tbl[] = {
  {
    VENDOR_ID,
    DEVICE_ID,
    PCI_ANY_ID,
    PCI_ANY_ID,
    0,
    0,
    0
  },
  {
    0,
  }
};

static struct pci_driver pci_drv = {
  .name = "codec_pci",
  .id_table = pci_drv_tbl,
  .probe = device_init,
  .remove = device_deinit
};

static int __init mod_init(void) {
  int i = 0;
  if (alloc_chrdev_region( & pci_dev_number, 0, MAX_DEVICES, "codec_pci") < 0)
    return -EIO;
  driver_object = cdev_alloc();
  if (driver_object == NULL)
    goto free_dev_number;
  driver_object - > owner = THIS_MODULE;
  driver_object - > ops = & fops;
  if (cdev_add(driver_object, pci_dev_number, MAX_DEVICES))
    goto free_cdev;
  pci_class = class_create(THIS_MODULE, "codec_pci");
  if (IS_ERR(pci_class)) {
    pr_err("codec MOD_INIT: no udev support availablen");
    goto free_cdev;
  }

  pci_prc = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 0), NULL, "%s", "codec_prc");
  pci_irq_0 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 1), NULL, "codec_irq_%d", 0);
  pci_irq_1 = device_create(pci_class, NULL, MKDEV(MAJOR(pci_dev_number), MINOR(pci_dev_number) + 2), NULL, "codec_irq_%d", 1);

  if (pci_register_driver( & pci_drv) < 0) {
    for (i = 0; i < MAX_DEVICES; i++)
      device_destroy(pci_class, MKDEV(pci_dev_number, i));
    goto free_dev_number;
  }

  return 0;
free_cdev:
    kobject_put( & driver_object - > kobj);
free_dev_number:
    unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
  return -EIO;
}

static void __exit mod_exit(void) {
  int i = 0;
  pci_unregister_driver( & pci_drv);
  device_unregister(pci_prc);
  device_unregister(pci_irq_0);
  device_unregister(pci_irq_1);
  for (i = 0; i < MAX_DEVICES; i++) {
    device_destroy(pci_class, MKDEV(pci_dev_number, i));
  }
  class_destroy(pci_class);
  cdev_del(driver_object);
  unregister_chrdev_region(pci_dev_number, MAX_DEVICES);
}

module_init(mod_init);
module_exit(mod_exit);

Error handling routines could be better, but they don’t get triggered anyway.

Advertisement

Answer

I guess I found the cause of my problem. I took a look at the PCI configuration space while executing each of the steps of my original post. The configuration space when interrupts are working:

# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *07* 04 10 00 00 00 80 05 10 00 00 00
. . .

And when it’s broken:

# lspci -xxx | grep Xilinx
23:00.0 Memory controller: Xilinx Corporation Device 7024
00: ee 10 24 70 *03* 04 10 00 00 00 80 05 10 00 00 00
. . .

What I found is that the command register value changes after the kernel module reload (marked with *). When the interrupt works the command register value is 0x0407, after the module reload it is 0x0403. Why? I don’t know. It is probably just the way the Xilinx AXI Memory Mapped to PCIe core is implemented.

Anyway, you can set the values of the PCI configuration space using setpci(8).

The wanted value of the command register is 0407 so you execute:

# setpci -d <vendor_id>:<device_id> command=0407

#read back to check if it worked
# sudo setpci -d <vendor_id>:<device_id> command
0407

Afterwards the interrupts are working again, and I do not need to reboot.

Within the kernel module you can e.g. use pci_write_config_byte(...) to set the command register (or any other) to the required value. The corresponding functions to access the configuration space can be found here: Linux Device Drivers – Accessing the Configuration Space

User contributions licensed under: CC BY-SA
3 People found this is helpful
Advertisement