Skip to content

Commit

Permalink
Merge tag 'cxl-fixes-for-5.12-rc8' of git://git.kernel.org/pub/scm/li…
Browse files Browse the repository at this point in the history
…nux/kernel/git/cxl/cxl

Pull CXL memory class fixes from Dan Williams:
 "A collection of fixes for the CXL memory class driver introduced in
  this release cycle.

  The driver was primarily developed on a work-in-progress QEMU
  emulation of the interface and we have since found a couple places
  where it hid spec compliance bugs in the driver, or had a spec
  implementation bug itself.

  The biggest change here is replacing a percpu_ref with an rwsem to
  cleanup a couple bugs in the error unwind path during ioctl device
  init. Lastly there were some minor cleanups to not export the
  power-management sysfs-ABI for the ioctl device, use the proper sysfs
  helper for emitting values, and prevent subtle bugs as new
  administration commands are added to the supported list.

  The bulk of it has appeared in -next save for the top commit which was
  found today and validated on a fixed-up QEMU model.

  Summary:

   - Fix support for CXL memory devices with registers offset from the
     BAR base.

   - Fix the reporting of device capacity.

   - Fix the driver commands list definition to be disconnected from the
     UAPI command list.

   - Replace percpu_ref with rwsem to fix initialization error path.

   - Fix leaks in the driver initialization error path.

   - Drop the power/ directory from CXL device sysfs.

   - Use the recommended sysfs helper for attribute 'show'
     implementations"

* tag 'cxl-fixes-for-5.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl:
  cxl/mem: Fix memory device capacity probing
  cxl/mem: Fix register block offset calculation
  cxl/mem: Force array size of mem_commands[] to CXL_MEM_COMMAND_ID_MAX
  cxl/mem: Disable cxl device power management
  cxl/mem: Do not rely on device_add() side effects for dev_set_name() failures
  cxl/mem: Fix synchronization mechanism for device removal vs ioctl operations
  cxl/mem: Use sysfs_emit() for attribute show routines
  • Loading branch information
torvalds committed Apr 17, 2021
2 parents fdb5d6c + fae8817 commit 7c22677
Showing 1 changed file with 89 additions and 63 deletions.
152 changes: 89 additions & 63 deletions drivers/cxl/mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include <linux/security.h>
#include <linux/debugfs.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/cdev.h>
#include <linux/idr.h>
Expand Down Expand Up @@ -96,21 +97,18 @@ struct mbox_cmd {
* @dev: driver core device object
* @cdev: char dev core object for ioctl operations
* @cxlm: pointer to the parent device driver data
* @ops_active: active user of @cxlm in ops handlers
* @ops_dead: completion when all @cxlm ops users have exited
* @id: id number of this memdev instance.
*/
struct cxl_memdev {
struct device dev;
struct cdev cdev;
struct cxl_mem *cxlm;
struct percpu_ref ops_active;
struct completion ops_dead;
int id;
};

static int cxl_mem_major;
static DEFINE_IDA(cxl_memdev_ida);
static DECLARE_RWSEM(cxl_memdev_rwsem);
static struct dentry *cxl_debugfs;
static bool cxl_raw_allow_all;

Expand Down Expand Up @@ -169,7 +167,7 @@ struct cxl_mem_command {
* table will be validated against the user's input. For example, if size_in is
* 0, and the user passed in 1, it is an error.
*/
static struct cxl_mem_command mem_commands[] = {
static struct cxl_mem_command mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
CXL_CMD(RAW, ~0, ~0, 0),
Expand Down Expand Up @@ -776,26 +774,43 @@ static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
unsigned long arg)
{
struct cxl_memdev *cxlmd;
struct inode *inode;
int rc = -ENOTTY;
struct cxl_memdev *cxlmd = file->private_data;
int rc = -ENXIO;

inode = file_inode(file);
cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev);
down_read(&cxl_memdev_rwsem);
if (cxlmd->cxlm)
rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
up_read(&cxl_memdev_rwsem);

if (!percpu_ref_tryget_live(&cxlmd->ops_active))
return -ENXIO;
return rc;
}

rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
static int cxl_memdev_open(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);

percpu_ref_put(&cxlmd->ops_active);
get_device(&cxlmd->dev);
file->private_data = cxlmd;

return rc;
return 0;
}

static int cxl_memdev_release_file(struct inode *inode, struct file *file)
{
struct cxl_memdev *cxlmd =
container_of(inode->i_cdev, typeof(*cxlmd), cdev);

put_device(&cxlmd->dev);

return 0;
}

static const struct file_operations cxl_memdev_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = cxl_memdev_ioctl,
.open = cxl_memdev_open,
.release = cxl_memdev_release_file,
.compat_ioctl = compat_ptr_ioctl,
.llseek = noop_llseek,
};
Expand Down Expand Up @@ -984,7 +999,7 @@ static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo,
return NULL;
}

offset = ((u64)reg_hi << 32) | FIELD_GET(CXL_REGLOC_ADDR_MASK, reg_lo);
offset = ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);

/* Basic sanity check that BAR is big enough */
Expand Down Expand Up @@ -1049,7 +1064,6 @@ static void cxl_memdev_release(struct device *dev)
{
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);

percpu_ref_exit(&cxlmd->ops_active);
ida_free(&cxl_memdev_ida, cxlmd->id);
kfree(cxlmd);
}
Expand All @@ -1066,7 +1080,7 @@ static ssize_t firmware_version_show(struct device *dev,
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_mem *cxlm = cxlmd->cxlm;

return sprintf(buf, "%.16s\n", cxlm->firmware_version);
return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
}
static DEVICE_ATTR_RO(firmware_version);

Expand All @@ -1076,7 +1090,7 @@ static ssize_t payload_max_show(struct device *dev,
struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
struct cxl_mem *cxlm = cxlmd->cxlm;

return sprintf(buf, "%zu\n", cxlm->payload_size);
return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
}
static DEVICE_ATTR_RO(payload_max);

Expand All @@ -1087,7 +1101,7 @@ static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
struct cxl_mem *cxlm = cxlmd->cxlm;
unsigned long long len = range_len(&cxlm->ram_range);

return sprintf(buf, "%#llx\n", len);
return sysfs_emit(buf, "%#llx\n", len);
}

static struct device_attribute dev_attr_ram_size =
Expand All @@ -1100,7 +1114,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
struct cxl_mem *cxlm = cxlmd->cxlm;
unsigned long long len = range_len(&cxlm->pmem_range);

return sprintf(buf, "%#llx\n", len);
return sysfs_emit(buf, "%#llx\n", len);
}

static struct device_attribute dev_attr_pmem_size =
Expand Down Expand Up @@ -1150,27 +1164,24 @@ static const struct device_type cxl_memdev_type = {
.groups = cxl_memdev_attribute_groups,
};

static void cxlmdev_unregister(void *_cxlmd)
static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd)
{
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;

percpu_ref_kill(&cxlmd->ops_active);
cdev_device_del(&cxlmd->cdev, dev);
wait_for_completion(&cxlmd->ops_dead);
down_write(&cxl_memdev_rwsem);
cxlmd->cxlm = NULL;
put_device(dev);
up_write(&cxl_memdev_rwsem);
}

static void cxlmdev_ops_active_release(struct percpu_ref *ref)
static void cxl_memdev_unregister(void *_cxlmd)
{
struct cxl_memdev *cxlmd =
container_of(ref, typeof(*cxlmd), ops_active);
struct cxl_memdev *cxlmd = _cxlmd;
struct device *dev = &cxlmd->dev;

complete(&cxlmd->ops_dead);
cdev_device_del(&cxlmd->cdev, dev);
cxl_memdev_shutdown(cxlmd);
put_device(dev);
}

static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm)
{
struct pci_dev *pdev = cxlm->pdev;
struct cxl_memdev *cxlmd;
Expand All @@ -1180,22 +1191,11 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm)

cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
if (!cxlmd)
return -ENOMEM;
init_completion(&cxlmd->ops_dead);

/*
* @cxlm is deallocated when the driver unbinds so operations
* that are using it need to hold a live reference.
*/
cxlmd->cxlm = cxlm;
rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0,
GFP_KERNEL);
if (rc)
goto err_ref;
return ERR_PTR(-ENOMEM);

rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
if (rc < 0)
goto err_id;
goto err;
cxlmd->id = rc;

dev = &cxlmd->dev;
Expand All @@ -1204,30 +1204,54 @@ static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
dev->bus = &cxl_bus_type;
dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
dev->type = &cxl_memdev_type;
dev_set_name(dev, "mem%d", cxlmd->id);
device_set_pm_not_required(dev);

cdev = &cxlmd->cdev;
cdev_init(cdev, &cxl_memdev_fops);
return cxlmd;

err:
kfree(cxlmd);
return ERR_PTR(rc);
}

static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
{
struct cxl_memdev *cxlmd;
struct device *dev;
struct cdev *cdev;
int rc;

cxlmd = cxl_memdev_alloc(cxlm);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);

dev = &cxlmd->dev;
rc = dev_set_name(dev, "mem%d", cxlmd->id);
if (rc)
goto err;

/*
* Activate ioctl operations, no cxl_memdev_rwsem manipulation
* needed as this is ordered with cdev_add() publishing the device.
*/
cxlmd->cxlm = cxlm;

cdev = &cxlmd->cdev;
rc = cdev_device_add(cdev, dev);
if (rc)
goto err_add;
goto err;

return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd);
return devm_add_action_or_reset(dev->parent, cxl_memdev_unregister,
cxlmd);

err_add:
ida_free(&cxl_memdev_ida, cxlmd->id);
err_id:
err:
/*
* Theoretically userspace could have already entered the fops,
* so flush ops_active.
* The cdev was briefly live, shutdown any ioctl operations that
* saw that state.
*/
percpu_ref_kill(&cxlmd->ops_active);
wait_for_completion(&cxlmd->ops_dead);
percpu_ref_exit(&cxlmd->ops_active);
err_ref:
kfree(cxlmd);

cxl_memdev_shutdown(cxlmd);
put_device(dev);
return rc;
}

Expand Down Expand Up @@ -1396,6 +1420,7 @@ static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
*/
static int cxl_mem_identify(struct cxl_mem *cxlm)
{
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify {
char fw_revision[0x10];
__le64 total_capacity;
Expand Down Expand Up @@ -1424,10 +1449,11 @@ static int cxl_mem_identify(struct cxl_mem *cxlm)
* For now, only the capacity is exported in sysfs
*/
cxlm->ram_range.start = 0;
cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) - 1;
cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;

cxlm->pmem_range.start = 0;
cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) - 1;
cxlm->pmem_range.end =
le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;

memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));

Expand Down

0 comments on commit 7c22677

Please sign in to comment.