/*
* V(R) I/O Scheduler
*
* Copyright (C) 2007 Aaron Carroll <aaronc@gelato.unsw.edu.au>
*
*
* The algorithm:
*
* The next request is decided based on its distance from the last
* request, with a multiplicative penalty of `rev_penalty' applied
* for reversing the head direction. A rev_penalty of 1 means SSTF
* behaviour. As this variable is increased, the algorithm approaches
* pure SCAN. Setting rev_penalty to 0 forces SCAN.
*
* Async and synch requests are not treated seperately. Instead we
* rely on deadlines to ensure fairness.
*
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/blkdev.h>
#include <linux/elevator.h>
#include <linux/bio.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/compiler.h>
#include <linux/rbtree.h>
#include <linux/version.h>

#include <asm/div64.h>

enum vr_data_dir {
	ASYNC,
	SYNC,
};

enum vr_head_dir {
	FORWARD,
	BACKWARD,
};

static const int sync_expire = 1000; /* max time before a sync is submitted. */
static const int async_expire = 5000; /* ditto for async, these limits are SOFT! */
static const int fifo_batch = 1;
static const int rev_penalty = 1; /* penalty for reversing head direction */

struct vr_data {
	struct rb_root sort_list;
	struct list_head fifo_list[2];

	struct request *next_rq;
	struct request *prev_rq;

	unsigned int nbatched;
	sector_t last_sector; /* head position */
	int head_dir;

	/* tunables */
	int fifo_expire[2];
	int fifo_batch;
	int rev_penalty;
};

static void vr_move_request(struct vr_data *, struct request *);

static inline struct vr_data *
		vr_get_data(struct request_queue *q)
{
	return q->elevator->elevator_data;
}

static void
vr_add_rq_rb(struct vr_data *vd, struct request *rq)
{
	elv_rb_add(&vd->sort_list, rq);

	if (blk_rq_pos(rq) >= vd->last_sector) {
		if (!vd->next_rq || blk_rq_pos(vd->next_rq) > blk_rq_pos(rq))
			vd->next_rq = rq;
	} else {
		if (!vd->prev_rq || blk_rq_pos(vd->prev_rq) < blk_rq_pos(rq))
			vd->prev_rq = rq;
	}

	BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);
	BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq));
}

static void
vr_del_rq_rb(struct vr_data *vd, struct request *rq)
{
	/*
	 * We might be deleting our cached next request.
	 * If so, find its sucessor.
	 */

	if (vd->next_rq == rq)
		vd->next_rq = elv_rb_latter_request(NULL, rq);
	else if (vd->prev_rq == rq)
		vd->prev_rq = elv_rb_former_request(NULL, rq);

	BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);
	BUG_ON(vd->next_rq && vd->prev_rq && blk_rq_pos(vd->next_rq) < blk_rq_pos(vd->prev_rq));

	elv_rb_del(&vd->sort_list, rq);
}

/*
 * add rq to rbtree and fifo
 */
static void
vr_add_request(struct request_queue *q, struct request *rq)
{
	struct vr_data *vd = vr_get_data(q);
	const int dir = rq_is_sync(rq);

	vr_add_rq_rb(vd, rq);

	if (vd->fifo_expire[dir]) {
		rq->fifo_time = jiffies + vd->fifo_expire[dir];
		list_add_tail(&rq->queuelist, &vd->fifo_list[dir]);
	}
}

/*
 * remove rq from rbtree and fifo.
 */
static void
vr_remove_request(struct request_queue *q, struct request *rq)
{
	struct vr_data *vd = vr_get_data(q);

	rq_fifo_clear(rq);
	vr_del_rq_rb(vd, rq);
}

static int
vr_merge(struct request_queue *q, struct request **rqp, struct bio *bio)
{
	sector_t sector = bio->bi_iter.bi_sector + bio_sectors(bio);
	struct vr_data *vd = vr_get_data(q);
	struct request *rq = elv_rb_find(&vd->sort_list, sector);

	if (rq && elv_rq_merge_ok(rq, bio)) {
		*rqp = rq;
		return ELEVATOR_FRONT_MERGE;
	}

	return ELEVATOR_NO_MERGE;
}

static void
vr_merged_request(struct request_queue *q, struct request *req, int type)
{
	struct vr_data *vd = vr_get_data(q);

	/*
	 * if the merge was a front merge, we need to reposition request
	 */
	if (type == ELEVATOR_FRONT_MERGE) {
		vr_del_rq_rb(vd, req);
		vr_add_rq_rb(vd, req);
	}
}

static void
vr_merged_requests(struct request_queue *q, struct request *rq,
			struct request *next)
{
	/*
	 * if next expires before rq, assign its expire time to rq
	 * and move into next position (next will be deleted) in fifo
	 */
	if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) {
		if (time_before(next->fifo_time, rq->fifo_time)) {
			list_move(&rq->queuelist, &next->queuelist);
			rq->fifo_time = next->fifo_time;
		}
	}

	vr_remove_request(q, next);
}

/*
 * move an entry to dispatch queue
 */
static void
vr_move_request(struct vr_data *vd, struct request *rq)
{
	struct request_queue *q = rq->q;

	if (blk_rq_pos(rq) > vd->last_sector)
		vd->head_dir = FORWARD;
	else
		vd->head_dir = BACKWARD;

	vd->last_sector = blk_rq_pos(rq);
	vd->next_rq = elv_rb_latter_request(NULL, rq);
	vd->prev_rq = elv_rb_former_request(NULL, rq);

	BUG_ON(vd->next_rq && vd->next_rq == vd->prev_rq);

	vr_remove_request(q, rq);
	elv_dispatch_add_tail(q, rq);
	vd->nbatched++;
}

/*
 * get the first expired request in direction ddir
 */
static struct request *
vr_expired_request(struct vr_data *vd, int ddir)
{
	struct request *rq;

	if (list_empty(&vd->fifo_list[ddir]))
		return NULL;

	rq = rq_entry_fifo(vd->fifo_list[ddir].next);
	if (time_after(jiffies,rq->fifo_time))
		return rq;

	return NULL;
}

/*
 * Returns the oldest expired request
 */
static struct request *
vr_check_fifo(struct vr_data *vd)
{
	struct request *rq_sync = vr_expired_request(vd, SYNC);
	struct request *rq_async = vr_expired_request(vd, ASYNC);

	if (rq_async && rq_sync) {
		if (time_after(rq_async->fifo_time, rq_sync->fifo_time))
			return rq_sync;
	} else if (rq_sync)
		return rq_sync;

	return rq_async;
}

/*
 * Return the request with the lowest penalty
 */
static struct request *
		vr_choose_request(struct vr_data *vd)
{
	int penalty = (vd->rev_penalty) ? : INT_MAX;
	struct request *next = vd->next_rq;
	struct request *prev = vd->prev_rq;
	sector_t next_pen, prev_pen;

	BUG_ON(prev && prev == next);

	if (!prev)
		return next;
	else if (!next)
		return prev;

	/* At this point both prev and next are defined and distinct */

	next_pen = blk_rq_pos(next) - vd->last_sector;
	prev_pen = vd->last_sector - blk_rq_pos(prev);

	if (vd->head_dir == FORWARD)
		next_pen = do_div(next_pen, penalty);
	else
		prev_pen = do_div(prev_pen, penalty);

	if (next_pen <= prev_pen)
		return next;

	return prev;
}

static int
vr_dispatch_requests(struct request_queue *q, int force)
{
	struct vr_data *vd = vr_get_data(q);
	struct request *rq = NULL;

	/* Check for and issue expired requests */
	if (vd->nbatched > vd->fifo_batch) {
		vd->nbatched = 0;
		rq = vr_check_fifo(vd);
	}

	if (!rq) {
		rq = vr_choose_request(vd);
		if (!rq)
			return 0;
	}

	vr_move_request(vd, rq);

	return 1;
}

#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38)
static int
vr_queue_empty(struct request_queue *q)
{
	struct vr_data *vd = vr_get_data(q);
	return RB_EMPTY_ROOT(&vd->sort_list);
}
#endif

static void
vr_exit_queue(struct elevator_queue *e)
{
	struct vr_data *vd = e->elevator_data;
	BUG_ON(!RB_EMPTY_ROOT(&vd->sort_list));
	kfree(vd);
}

/*
 * initialize elevator private data (vr_data).
 */
static int vr_init_queue(struct request_queue *q, struct elevator_type *e)
{
	struct vr_data *vd;
	struct elevator_queue *eq;

	eq = elevator_alloc(q, e);
	if (!eq)
		return -ENOMEM;

	vd = kmalloc_node(sizeof(*vd), GFP_KERNEL | __GFP_ZERO, q->node);
	if (!vd) {
		kobject_put(&eq->kobj);
		return -ENOMEM;
	}
	eq->elevator_data = vd;

	INIT_LIST_HEAD(&vd->fifo_list[SYNC]);
	INIT_LIST_HEAD(&vd->fifo_list[ASYNC]);
	vd->sort_list = RB_ROOT;
	vd->fifo_expire[SYNC] = sync_expire;
	vd->fifo_expire[ASYNC] = async_expire;
	vd->fifo_batch = fifo_batch;
	vd->rev_penalty = rev_penalty;

	spin_lock_irq(q->queue_lock);
	q->elevator = eq;
	spin_unlock_irq(q->queue_lock);
	return 0;
}

/*
 * sysfs parts below
 */
static ssize_t vr_var_show(int var, char *page)
{
	return sprintf(page, "%d\n", var);
}

static ssize_t
vr_var_store(int *var, const char *page, size_t count)
{
	*var = simple_strtol(page, NULL, 10);
	return count;
}

#define SHOW_FUNCTION(__FUNC, __VAR, __CONV) \
	static ssize_t __FUNC(struct elevator_queue *e, char *page) \
	{ \
	struct vr_data *vd = e->elevator_data; \
	int __data = __VAR; \
	if (__CONV) \
		__data = jiffies_to_msecs(__data); \
	return vr_var_show(__data, (page)); \
}
SHOW_FUNCTION(vr_sync_expire_show, vd->fifo_expire[SYNC], 1);
SHOW_FUNCTION(vr_async_expire_show, vd->fifo_expire[ASYNC], 1);
SHOW_FUNCTION(vr_fifo_batch_show, vd->fifo_batch, 0);
SHOW_FUNCTION(vr_rev_penalty_show, vd->rev_penalty, 0);
#undef SHOW_FUNCTION

#define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX, __CONV) \
static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \
	{ \
	struct vr_data *vd = e->elevator_data; \
	int __data; \
	int ret = vr_var_store(&__data, (page), count); \
	if (__data < (MIN)) \
		__data = (MIN); \
	else if (__data > (MAX)) \
		__data = (MAX); \
		if (__CONV) \
			*(__PTR) = msecs_to_jiffies(__data); \
		else \
			*(__PTR) = __data; \
	return ret; \
}
STORE_FUNCTION(vr_sync_expire_store, &vd->fifo_expire[SYNC], 0, INT_MAX, 1);
STORE_FUNCTION(vr_async_expire_store, &vd->fifo_expire[ASYNC], 0, INT_MAX, 1);
STORE_FUNCTION(vr_fifo_batch_store, &vd->fifo_batch, 0, INT_MAX, 0);
STORE_FUNCTION(vr_rev_penalty_store, &vd->rev_penalty, 0, INT_MAX, 0);
#undef STORE_FUNCTION

#define DD_ATTR(name) \
	__ATTR(name, S_IRUGO|S_IWUSR, vr_##name##_show, \
	vr_##name##_store)

static struct elv_fs_entry vr_attrs[] = {
	DD_ATTR(sync_expire),
	DD_ATTR(async_expire),
	DD_ATTR(fifo_batch),
	DD_ATTR(rev_penalty),
	__ATTR_NULL
};

static struct elevator_type iosched_vr = {
	.ops = {
		.elevator_merge_fn = vr_merge,
		.elevator_merged_fn = vr_merged_request,
		.elevator_merge_req_fn = vr_merged_requests,
		.elevator_dispatch_fn = vr_dispatch_requests,
		.elevator_add_req_fn = vr_add_request,
		#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,38)
		.elevator_queue_empty_fn = vr_queue_empty,
		#endif
		.elevator_former_req_fn = elv_rb_former_request,
		.elevator_latter_req_fn = elv_rb_latter_request,
		.elevator_init_fn = vr_init_queue,
		.elevator_exit_fn = vr_exit_queue,
	},

		.elevator_attrs = vr_attrs,
		.elevator_name = "vr",
		.elevator_owner = THIS_MODULE,
};

static int __init vr_init(void)
{
	return elv_register(&iosched_vr);
}

static void __exit vr_exit(void)
{
	elv_unregister(&iosched_vr);
}

module_init(vr_init);
module_exit(vr_exit);

MODULE_AUTHOR("Aaron Carroll");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("V(R) IO scheduler");