aboutsummaryrefslogtreecommitdiff
path: root/block/blk-ioprio.c
blob: 4051fada01f1e87086b95d34a67743efbfd342f0 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
// SPDX-License-Identifier: GPL-2.0
/*
 * Block rq-qos policy for assigning an I/O priority class to requests.
 *
 * Using an rq-qos policy for assigning I/O priority class has two advantages
 * over using the ioprio_set() system call:
 *
 * - This policy is cgroup based so it has all the advantages of cgroups.
 * - While ioprio_set() does not affect page cache writeback I/O, this rq-qos
 *   controller affects page cache writeback I/O for filesystems that support
 *   assiociating a cgroup with writeback I/O. See also
 *   Documentation/admin-guide/cgroup-v2.rst.
 */

#include <linux/blk-mq.h>
#include <linux/blk_types.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include "blk-cgroup.h"
#include "blk-ioprio.h"
#include "blk-rq-qos.h"

/**
 * enum prio_policy - I/O priority class policy.
 * @POLICY_NO_CHANGE: (default) do not modify the I/O priority class.
 * @POLICY_PROMOTE_TO_RT: modify no-IOPRIO_CLASS_RT to IOPRIO_CLASS_RT.
 * @POLICY_RESTRICT_TO_BE: modify IOPRIO_CLASS_NONE and IOPRIO_CLASS_RT into
 *		IOPRIO_CLASS_BE.
 * @POLICY_ALL_TO_IDLE: change the I/O priority class into IOPRIO_CLASS_IDLE.
 * @POLICY_NONE_TO_RT: an alias for POLICY_PROMOTE_TO_RT.
 *
 * See also <linux/ioprio.h>.
 */
enum prio_policy {
	POLICY_NO_CHANGE	= 0,
	POLICY_PROMOTE_TO_RT	= 1,
	POLICY_RESTRICT_TO_BE	= 2,
	POLICY_ALL_TO_IDLE	= 3,
	POLICY_NONE_TO_RT	= 4,
};

static const char *policy_name[] = {
	[POLICY_NO_CHANGE]	= "no-change",
	[POLICY_PROMOTE_TO_RT]	= "promote-to-rt",
	[POLICY_RESTRICT_TO_BE]	= "restrict-to-be",
	[POLICY_ALL_TO_IDLE]	= "idle",
	[POLICY_NONE_TO_RT]	= "none-to-rt",
};

static struct blkcg_policy ioprio_policy;

/**
 * struct ioprio_blkg - Per (cgroup, request queue) data.
 * @pd: blkg_policy_data structure.
 */
struct ioprio_blkg {
	struct blkg_policy_data pd;
};

/**
 * struct ioprio_blkcg - Per cgroup data.
 * @cpd: blkcg_policy_data structure.
 * @prio_policy: One of the IOPRIO_CLASS_* values. See also <linux/ioprio.h>.
 */
struct ioprio_blkcg {
	struct blkcg_policy_data cpd;
	enum prio_policy	 prio_policy;
};

static inline struct ioprio_blkg *pd_to_ioprio(struct blkg_policy_data *pd)
{
	return pd ? container_of(pd, struct ioprio_blkg, pd) : NULL;
}

static struct ioprio_blkcg *blkcg_to_ioprio_blkcg(struct blkcg *blkcg)
{
	return container_of(blkcg_to_cpd(blkcg, &ioprio_policy),
			    struct ioprio_blkcg, cpd);
}

static struct ioprio_blkcg *
ioprio_blkcg_from_css(struct cgroup_subsys_state *css)
{
	return blkcg_to_ioprio_blkcg(css_to_blkcg(css));
}

static struct ioprio_blkcg *ioprio_blkcg_from_bio(struct bio *bio)
{
	struct blkg_policy_data *pd = blkg_to_pd(bio->bi_blkg, &ioprio_policy);

	if (!pd)
		return NULL;

	return blkcg_to_ioprio_blkcg(pd->blkg->blkcg);
}

static int ioprio_show_prio_policy(struct seq_file *sf, void *v)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(seq_css(sf));

	seq_printf(sf, "%s\n", policy_name[blkcg->prio_policy]);
	return 0;
}

static ssize_t ioprio_set_prio_policy(struct kernfs_open_file *of, char *buf,
				      size_t nbytes, loff_t off)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_css(of_css(of));
	int ret;

	if (off != 0)
		return -EIO;
	/* kernfs_fop_write_iter() terminates 'buf' with '\0'. */
	ret = sysfs_match_string(policy_name, buf);
	if (ret < 0)
		return ret;
	blkcg->prio_policy = ret;
	return nbytes;
}

static struct blkg_policy_data *
ioprio_alloc_pd(struct gendisk *disk, struct blkcg *blkcg, gfp_t gfp)
{
	struct ioprio_blkg *ioprio_blkg;

	ioprio_blkg = kzalloc(sizeof(*ioprio_blkg), gfp);
	if (!ioprio_blkg)
		return NULL;

	return &ioprio_blkg->pd;
}

static void ioprio_free_pd(struct blkg_policy_data *pd)
{
	struct ioprio_blkg *ioprio_blkg = pd_to_ioprio(pd);

	kfree(ioprio_blkg);
}

static struct blkcg_policy_data *ioprio_alloc_cpd(gfp_t gfp)
{
	struct ioprio_blkcg *blkcg;

	blkcg = kzalloc(sizeof(*blkcg), gfp);
	if (!blkcg)
		return NULL;
	blkcg->prio_policy = POLICY_NO_CHANGE;
	return &blkcg->cpd;
}

static void ioprio_free_cpd(struct blkcg_policy_data *cpd)
{
	struct ioprio_blkcg *blkcg = container_of(cpd, typeof(*blkcg), cpd);

	kfree(blkcg);
}

#define IOPRIO_ATTRS						\
	{							\
		.name		= "prio.class",			\
		.seq_show	= ioprio_show_prio_policy,	\
		.write		= ioprio_set_prio_policy,	\
	},							\
	{ } /* sentinel */

/* cgroup v2 attributes */
static struct cftype ioprio_files[] = {
	IOPRIO_ATTRS
};

/* cgroup v1 attributes */
static struct cftype ioprio_legacy_files[] = {
	IOPRIO_ATTRS
};

static struct blkcg_policy ioprio_policy = {
	.dfl_cftypes	= ioprio_files,
	.legacy_cftypes = ioprio_legacy_files,

	.cpd_alloc_fn	= ioprio_alloc_cpd,
	.cpd_free_fn	= ioprio_free_cpd,

	.pd_alloc_fn	= ioprio_alloc_pd,
	.pd_free_fn	= ioprio_free_pd,
};

void blkcg_set_ioprio(struct bio *bio)
{
	struct ioprio_blkcg *blkcg = ioprio_blkcg_from_bio(bio);
	u16 prio;

	if (!blkcg || blkcg->prio_policy == POLICY_NO_CHANGE)
		return;

	if (blkcg->prio_policy == POLICY_PROMOTE_TO_RT ||
	    blkcg->prio_policy == POLICY_NONE_TO_RT) {
		/*
		 * For RT threads, the default priority level is 4 because
		 * task_nice is 0. By promoting non-RT io-priority to RT-class
		 * and default level 4, those requests that are already
		 * RT-class but need a higher io-priority can use ioprio_set()
		 * to achieve this.
		 */
		if (IOPRIO_PRIO_CLASS(bio->bi_ioprio) != IOPRIO_CLASS_RT)
			bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_RT, 4);
		return;
	}

	/*
	 * Except for IOPRIO_CLASS_NONE, higher I/O priority numbers
	 * correspond to a lower priority. Hence, the max_t() below selects
	 * the lower priority of bi_ioprio and the cgroup I/O priority class.
	 * If the bio I/O priority equals IOPRIO_CLASS_NONE, the cgroup I/O
	 * priority is assigned to the bio.
	 */
	prio = max_t(u16, bio->bi_ioprio,
			IOPRIO_PRIO_VALUE(blkcg->prio_policy, 0));
	if (prio > bio->bi_ioprio)
		bio->bi_ioprio = prio;
}

void blk_ioprio_exit(struct gendisk *disk)
{
	blkcg_deactivate_policy(disk, &ioprio_policy);
}

int blk_ioprio_init(struct gendisk *disk)
{
	return blkcg_activate_policy(disk, &ioprio_policy);
}

static int __init ioprio_init(void)
{
	return blkcg_policy_register(&ioprio_policy);
}

static void __exit ioprio_exit(void)
{
	blkcg_policy_unregister(&ioprio_policy);
}

module_init(ioprio_init);
module_exit(ioprio_exit);