I a trying to write a linux-driver for a PCIe device which should have bus mastering capabilities. I am encountering the problem that the buffer space, when initialized with dma_alloc_coherent() is not reachable by a user-space application. A function says more than a thousand words, so I will post the most important pieces of code :
//Private driver structure struct pcichardev { struct bar_t bar; dev_t major; struct cdev cdev; void * buffer; dma_addr_t buffer_phys; };
static int pci_probe(struct pci_dev *pdev, const struct pci_device_id *id){ printk("Entering pci-probe\n"); int errno = 0; int num_mem_bars; struct pcichardev *locpcichar; struct device *dev; dev_t dev_num; locpcichar = kmalloc(sizeof(struct pcichardev), GFP_KERNEL); //should be right choice if(!locpcichar){ errno = -ENOMEM; goto failure_kmalloc; } errno = pci_enable_device_mem(pdev); if(errno){ goto failure_pci_enable; } pci_set_master(pdev); //enable bus mastering // enable 64 bit DMA dma_set_mask_and_coherent(&(pdev->dev), DMA_BIT_MASK(64)); //allocate memory for bus mastring operations void *buffer; dma_addr_t buffer_handle; size_t buffer_size = 4096; // Allocate a 4KB buffer, adjust size as needed // Allocate DMA-coherent memory for bus-mastering transfers buffer = dma_alloc_coherent(&(pdev->dev), buffer_size, &buffer_handle, GFP_USER); if (!buffer) { dev_err(&pdev->dev, "Failed to allocate DMA memory\n"); return -ENOMEM; } printk("address of buffer is %p\n", (void *) buffer); printk("buffer handle is : %llx\n", buffer_handle); locpcichar->buffer = buffer; locpcichar->buffer_phys = buffer_handle; printk("DMA buffer allocated at virtual address: %p, physical address: 0x%llx, handle is : 0x%llx\n", buffer, virt_to_phys(buffer), buffer_handle); //request only the bars containing a memory region num_mem_bars = pci_select_bars(pdev, IORESOURCE_MEM); errno = pci_request_selected_regions(pdev, num_mem_bars, "pci-char"); if(errno){ goto failure_pci_regions; } //Memory map BARs to MMIO regions if(num_mem_bars){ locpcichar->bar.addr = ioremap(pci_resource_start(pdev,0), pci_resource_len(pdev, 0)); if(IS_ERR(locpcichar->bar.addr)){ errno = PTR_ERR(locpcichar->bar.addr); }else{ locpcichar->bar.len = pci_resource_len(pdev, 0); } }else{ locpcichar->bar.addr = NULL; locpcichar->bar.len = 0; } //if there was an error, then perform unmapping if(errno){ if(locpcichar->bar.len){ iounmap(locpcichar->bar.addr); } goto failure_ioremap; } //get the minor number range of the device, for now, it should all be zero errno = alloc_chrdev_region(&dev_num, 0, 1, "xpci_chrdev"); if(errno){ goto failure_alloc_chrdev_region; } locpcichar->major = MAJOR(dev_num); //connect this chrdev instance with all previously defined character operations cdev_init(&locpcichar->cdev, &fops); locpcichar->cdev.owner = THIS_MODULE; //add cdev with major/min number, quite useless, as we only have a BAR, and thus a minor number errno = cdev_add(&locpcichar->cdev, MKDEV(locpcichar->major, 0), 1); if(errno){ goto failure_cdev_add; } //create /dev nodes with udev if(locpcichar->bar.len){ dev = device_create(pchar_class, &pdev->dev, MKDEV(locpcichar->major, 0), NULL, "b%xd%xf%x_bar%d", pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 0); if(IS_ERR(dev)){ errno = PTR_ERR(dev); } } printk("PCI-PROBE : Bar at address %p, with length : %ld \n", (void *) locpcichar->bar.addr, (unsigned long) locpcichar->bar.len); if(errno){ if(locpcichar->bar.len){ device_destroy(pchar_class, MKDEV(locpcichar->major, 0)); } goto failure_device_create; } pci_set_drvdata(pdev, locpcichar); dev_info(&pdev->dev, "claimed by xpci_chrdev\n"); return 0; failure_device_create: printk("failure device create\n"); cdev_del(&locpcichar->cdev); failure_cdev_add: printk("failure cdev add\n"); unregister_chrdev_region(MKDEV(locpcichar->major, 0), 1); failure_alloc_chrdev_region: printk("failure alloc cdev region\n"); if (locpcichar->bar.len) iounmap(locpcichar->bar.addr); failure_ioremap: printk("failure ioremap\n"); pci_release_selected_regions(pdev, pci_select_bars(pdev, IORESOURCE_MEM)); failure_pci_regions: printk("failure pci regions\n"); pci_disable_device(pdev); failure_pci_enable: printk("failure pci enable\n"); kfree(locpcichar); failure_pci_mastering: printk("failure in setting pci dev to bus-mastering\n"); kfree(locpcichar); failure_kmalloc: printk("failure kmalloc\n"); return errno; }
static int xdev_mmap(struct file *file, struct vm_area_struct *vma) { vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; //vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); struct pcichardev *locpcichar = file->private_data; printk("In mmap\n"); printk("vma size : %d\n", vma->vm_end - vma->vm_start); printk("buffer_phys @ 0x%llx", locpcichar->buffer_phys); printk("virt_to_phys delivers 0x%llx", virt_to_phys(locpcichar->buffer_phys)); // Map the DMA-coherent memory to user-space int ret = remap_pfn_range(vma, vma->vm_start, locpcichar->buffer_phys >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); if (ret) { pr_err("Failed to mmap DMA buffer to user-space\n"); return -EIO; } return 0;}
so much for the driver, what I am trying to do in the user-space application is the following :
// Step 1: Open the character device fd = open(DEVICE_PATH, O_RDWR | O_SYNC); if (fd < 0) { perror("Failed to open the device"); return EXIT_FAILURE; } // Step 2: Map the DMA buffer to user-space dma_buffer = mmap(NULL, BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); printf("MMAP delivered the following address : %p\n", dma_buffer); if (dma_buffer == MAP_FAILED) { perror("Failed to mmap the device"); close(fd); return EXIT_FAILURE; } // Step 3: Initialize the buffer with test data data = (uint32_t *) dma_buffer; for (i = 0; i < BUFFER_SIZE / sizeof(uint32_t); i++) { data[i] = i; } // Step 4: Verify data in the DMA buffer after the transfer printf("Verifying data in the DMA buffer...\n"); for (i = 0; i < BUFFER_SIZE / sizeof(uint32_t); i++) { printf("data at pos %i --> 0x%x\n",i, data[i] ); if (data[i] != i) { fprintf(stderr, "Data mismatch at index %d: expected 0x%X, got 0x%X\n", i, i, data[i]); } }
Incredibly enough making the following changes actually makes the application work :
static int xdev_mmap(struct file *file, struct vm_area_struct *vma) { vma->vm_flags |= VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | VM_DONTDUMP | VM_PFNMAP; //vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); struct pcichardev *locpcichar = file->private_data; printk("In mmap\n"); printk("vma size : %d\n", vma->vm_end - vma->vm_start); printk("buffer_phys @ 0x%llx", locpcichar->buffer_phys); printk("virt_to_phys delivers 0x%llx", virt_to_phys(locpcichar->buffer_phys)); // Map the DMA-coherent memory to user-space int ret = remap_pfn_range(vma, vma->vm_start, virt_to_phys(locpcichar->buffer) >> PAGE_SHIFT, vma->vm_end - vma->vm_start, vma->vm_page_prot); if (ret) { pr_err("Failed to mmap DMA buffer to user-space\n"); return -EIO; } return 0;}
int errno = 0; int num_mem_bars; struct pcichardev *locpcichar; struct device *dev; dev_t dev_num; locpcichar = kmalloc(sizeof(struct pcichardev), GFP_KERNEL); //should be right choice if(!locpcichar){ errno = -ENOMEM; goto failure_kmalloc; } errno = pci_enable_device_mem(pdev); if(errno){ goto failure_pci_enable; } pci_set_master(pdev); //enable bus mastering // enable 64 bit DMA dma_set_mask_and_coherent(&(pdev->dev), DMA_BIT_MASK(64)); //allocate memory for bus mastring operations void *buffer; dma_addr_t buffer_handle; size_t buffer_size = 4096; // Allocate a 4KB buffer, adjust size as needed buffer = kmalloc(buffer_size, GFP_USER); if (!buffer) { dev_err(&pdev->dev, "Failed to allocate DMA memory\n"); return -ENOMEM; } printk("address of buffer is %p\n", (void *) buffer); printk("buffer handle is : %llx\n", buffer_handle); buffer_handle = dma_map_single(&pdev->dev, buffer, buffer_size, DMA_BIDIRECTIONAL); locpcichar->buffer = buffer; locpcichar->buffer_phys = buffer_handle; printk("DMA buffer allocated at virtual address: %p, physical address: 0x%llx, handle is : 0x%llx\n", buffer, virt_to_phys(buffer), buffer_handle);
What am I doing wrong here? sorry for the dumb question...