/* * V(R) I/O Scheduler * * Copyright (C) 2007 Aaron Carroll * * * The algorithm: * * The next request is decided based on its distance from the last * request, with a multiplicative penalty of `rev_penalty' applied * for reversing the head direction. A rev_penalty of 1 means SSTF * behaviour. As this variable is increased, the algorithm approaches * pure SCAN. Setting rev_penalty to 0 forces SCAN. * * Async and synch requests are not treated seperately. Instead we * rely on deadlines to ensure fairness. * */ #include #include #include #include #include #include #include #include #include #include #include #include enum vr_data_dir { ASYNC, SYNC, }; enum vr_head_dir { FORWARD, BACKWARD, }; static const int sync_expire = 1000; /* max time before a sync is submitted. */ static const int async_expire = 5000; /* ditto for async, these limits are SOFT! */ static const int fifo_batch = 1; static const int rev_penalty = 1; /* penalty for reversing head direction */ struct vr_data { struct rb_root sort_list; struct list_head fifo_list[2]; struct request *next_rq; struct request *prev_rq; unsigned int nbatched; sector_t last_sector; /* head position */ int head_dir; /* tunables */ int fifo_expire[2]; int fifo_batch; int rev_penalty; }; static void vr_move_request(struct vr_data *, struct request *); static inline struct vr_data * vr_get_data(struct request_queue *q) { return q->elevator->elevator_data; } static void vr_add_rq_rb(struct vr_data *vd, struct request *rq) { elv_rb_add(&vd->sort_list, rq); if (blk_rq_pos(rq) >= vd->last_sector) { if (!vd->next_rq || blk_rq_pos(vd->next_rq) > blk_rq_pos(rq)) vd->next_rq = rq; } else { if (!vd->prev_rq || blk_rq_pos(vd->prev_rq) < blk_rq_pos(rq)) vd->prev_rq = rq; } BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); } static void vr_del_rq_rb(struct vr_data *vd, struct request *rq) { /* * We might be deleting our cached next request. * If so, find its sucessor. */ if (vd->next_rq == rq) vd->next_rq = elv_rb_latter_request(NULL, rq); else if (vd->prev_rq == rq) vd->prev_rq = elv_rb_former_request(NULL, rq); BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq)); elv_rb_del(&vd->sort_list, rq); } /* * add rq to rbtree and fifo */ static void vr_add_request(struct request_queue *q, struct request *rq) { struct vr_data *vd = vr_get_data(q); const int dir = rq_is_sync(rq); vr_add_rq_rb(vd, rq); if (vd->fifo_expire[dir]) { rq->fifo_time = jiffies + vd->fifo_expire[dir]; list_add_tail(&rq->queuelist, &vd->fifo_list[dir]); } } /* * remove rq from rbtree and fifo. */ static void vr_remove_request(struct request_queue *q, struct request *rq) { struct vr_data *vd = vr_get_data(q); rq_fifo_clear(rq); vr_del_rq_rb(vd, rq); } static int vr_merge(struct request_queue *q, struct request **rqp, struct bio *bio) { sector_t sector = bio->bi_iter.bi_sector + bio_sectors(bio); struct vr_data *vd = vr_get_data(q); struct request *rq = elv_rb_find(&vd->sort_list, sector); if (rq && elv_rq_merge_ok(rq, bio)) { *rqp = rq; return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; } static void vr_merged_request(struct request_queue *q, struct request *req, int type) { struct vr_data *vd = vr_get_data(q); /* * if the merge was a front merge, we need to reposition request */ if (type == ELEVATOR_FRONT_MERGE) { vr_del_rq_rb(vd, req); vr_add_rq_rb(vd, req); } } static void vr_merged_requests(struct request_queue *q, struct request *rq, struct request *next) { /* * if next expires before rq, assign its expire time to rq * and move into next position (next will be deleted) in fifo */ if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { if (time_before(next->fifo_time, rq->fifo_time)) { list_move(&rq->queuelist, &next->queuelist); rq->fifo_time = next->fifo_time; } } vr_remove_request(q, next); } /* * move an entry to dispatch queue */ static void vr_move_request(struct vr_data *vd, struct request *rq) { struct request_queue *q = rq->q; if (blk_rq_pos(rq) > vd->last_sector) vd->head_dir = FORWARD; else vd->head_dir = BACKWARD; vd->last_sector = blk_rq_pos(rq); vd->next_rq = elv_rb_latter_request(NULL, rq); vd->prev_rq = elv_rb_former_request(NULL, rq); BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq); vr_remove_request(q, rq); elv_dispatch_add_tail(q, rq); vd->nbatched++; } /* * get the first expired request in direction ddir */ static struct request * vr_expired_request(struct vr_data *vd, int ddir) { struct request *rq; if (list_empty(&vd->fifo_list[ddir])) return NULL; rq = rq_entry_fifo(vd->fifo_list[ddir].next); if (time_after(jiffies,rq->fifo_time)) return rq; return NULL; } /* * Returns the oldest expired request */ static struct request * vr_check_fifo(struct vr_data *vd) { struct request *rq_sync = vr_expired_request(vd, SYNC); struct request *rq_async = vr_expired_request(vd, ASYNC); if (rq_async && rq_sync) { if (time_after(rq_async->fifo_time, rq_sync->fifo_time)) return rq_sync; } else if (rq_sync) return rq_sync; return rq_async; } /* * Return the request with the lowest penalty */ static struct request * vr_choose_request(struct vr_data *vd) { int penalty = (vd->rev_penalty) ? : INT_MAX; struct request *next = vd->next_rq; struct request *prev = vd->prev_rq; sector_t next_pen, prev_pen; BUG_ON(prev && prev == next); if (!prev) return next; else if (!next) return prev; /* At this point both prev and next are defined and distinct */ next_pen = blk_rq_pos(next) - vd->last_sector; prev_pen = vd->last_sector - blk_rq_pos(prev); if (vd->head_dir == FORWARD) next_pen = do_div(next_pen, penalty); else prev_pen = do_div(prev_pen, penalty); if (next_pen <= prev_pen) return next; return prev; } static int vr_dispatch_requests(struct request_queue *q, int force) { struct vr_data *vd = vr_get_data(q); struct request *rq = NULL; /* Check for and issue expired requests */ if (vd->nbatched > vd->fifo_batch) { vd->nbatched = 0; rq = vr_check_fifo(vd); } if (!rq) { rq = vr_choose_request(vd); if (!rq) return 0; } vr_move_request(vd, rq); return 1; } #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) static int vr_queue_empty(struct request_queue *q) { struct vr_data *vd = vr_get_data(q); return RB_EMPTY_ROOT(&vd->sort_list); } #endif static void vr_exit_queue(struct elevator_queue *e) { struct vr_data *vd = e->elevator_data; BUG_ON(!RB_EMPTY_ROOT(&vd->sort_list)); kfree(vd); } /* * initialize elevator private data (vr_data). */ static int vr_init_queue(struct request_queue *q, struct elevator_type *e) { struct vr_data *vd; struct elevator_queue *eq; eq = elevator_alloc(q, e); if (!eq) return -ENOMEM; vd = kmalloc_node(sizeof(*vd), GFP_KERNEL | __GFP_ZERO, q->node); if (!vd) { kobject_put(&eq->kobj); return -ENOMEM; } eq->elevator_data = vd; INIT_LIST_HEAD(&vd->fifo_list[SYNC]); INIT_LIST_HEAD(&vd->fifo_list[ASYNC]); vd->sort_list = RB_ROOT; vd->fifo_expire[SYNC] = sync_expire; vd->fifo_expire[ASYNC] = async_expire; vd->fifo_batch = fifo_batch; vd->rev_penalty = rev_penalty; spin_lock_irq(q->queue_lock); q->elevator = eq; spin_unlock_irq(q->queue_lock); return 0; } /* * sysfs parts below */ static ssize_t vr_var_show(int var, char *page) { return sprintf(page, "%d\n", var); } static ssize_t vr_var_store(int *var, const char *page, size_t count) { *var = simple_strtol(page, NULL, 10); return count; } #define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \ static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct vr_data *vd = e->elevator_data; \ int __data = __VAR; \ if (__CONV) \ __data = jiffies_to_msecs(__data); \ return vr_var_show(__data, (page)); \ } SHOW_FUNCTION(vr_sync_expire_show, vd->fifo_expire[SYNC], 1); SHOW_FUNCTION(vr_async_expire_show, vd->fifo_expire[ASYNC], 1); SHOW_FUNCTION(vr_fifo_batch_show, vd->fifo_batch, 0); SHOW_FUNCTION(vr_rev_penalty_show, vd->rev_penalty, 0); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \ static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct vr_data *vd = e->elevator_data; \ int __data; \ int ret = vr_var_store(&__data, (page), count); \ if (__data < (MIN)) \ __data = (MIN); \ else if (__data > (MAX)) \ __data = (MAX); \ if (__CONV) \ *(__PTR) = msecs_to_jiffies(__data); \ else \ *(__PTR) = __data; \ return ret; \ } STORE_FUNCTION(vr_sync_expire_store, &vd->fifo_expire[SYNC], 0, INT_MAX, 1); STORE_FUNCTION(vr_async_expire_store, &vd->fifo_expire[ASYNC], 0, INT_MAX, 1); STORE_FUNCTION(vr_fifo_batch_store, &vd->fifo_batch, 0, INT_MAX, 0); STORE_FUNCTION(vr_rev_penalty_store, &vd->rev_penalty, 0, INT_MAX, 0); #undef STORE_FUNCTION #define DD_ATTR(name) \ __ATTR(name, S_IRUGO|S_IWUSR, vr_##name##_show, \ vr_##name##_store) static struct elv_fs_entry vr_attrs[] = { DD_ATTR(sync_expire), DD_ATTR(async_expire), DD_ATTR(fifo_batch), DD_ATTR(rev_penalty), __ATTR_NULL }; static struct elevator_type iosched_vr = { .ops = { .elevator_merge_fn = vr_merge, .elevator_merged_fn = vr_merged_request, .elevator_merge_req_fn = vr_merged_requests, .elevator_dispatch_fn = vr_dispatch_requests, .elevator_add_req_fn = vr_add_request, #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38) .elevator_queue_empty_fn = vr_queue_empty, #endif .elevator_former_req_fn = elv_rb_former_request, .elevator_latter_req_fn = elv_rb_latter_request, .elevator_init_fn = vr_init_queue, .elevator_exit_fn = vr_exit_queue, }, .elevator_attrs = vr_attrs, .elevator_name = "vr", .elevator_owner = THIS_MODULE, }; static int __init vr_init(void) { return elv_register(&iosched_vr); } static void __exit vr_exit(void) { elv_unregister(&iosched_vr); } module_init(vr_init); module_exit(vr_exit); MODULE_AUTHOR("Aaron Carroll"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("V(R) IO scheduler");