//quake1 .mdl support
#include "common.h"

#if 1//def USEGL
#include "renderer.h"
#ifdef USEGL
#include "glrenderer.h"
#endif
#ifdef USEVK
#include "vkrenderer.h"
#define GLint int32_t
#define GLuint uint32_t
#define GLsizeiptr ptrdiff_t
#endif
#include "filesystem.h"

#define assert(x) if (!(x)) Sys_Error("Assertion failed: "#x);

#define ENTBATCHSIZE 512

#ifdef USEGL

#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)

#define GLSLVER "#version 140\n"	//3.1

static const char *vertexprogram =
GLSLVER
#ifdef USEBINDLESS
"#extension GL_ARB_bindless_texture : require\n"
#endif
"#extension GL_ARB_shader_storage_buffer_object : require\n"	/*FIXME: replace with gl_ARB_texture_buffer_object (core in gl3.0)*/
"layout(std140/*, binding = 0*/) uniform MatrixBlock"
"{"
	"mat4 mvp;"
/*
	"float lmscale[256];"
	"vec3 eyepos;"
	"float time;"
#ifdef USEBINDLESS
	"struct"
	"{"
		"sampler2D d;"
		"sampler2D f;"
	"} materials[256];"
#endif
*/
"};"
"layout(std140/*, binding = 1*/) uniform EntityBuffer"
"{"
	"struct"
	"{"
		"mat4	model;"
#ifdef USEBINDLESS
		"sampler2D diffuse;"
		"uint	basevert;"
		"float	pad2;"
#else
		"float	texlayer;"
		"uint	basevert;"
		"vec2	pad2;"
#endif
		"uvec2	firstvert;"
		"vec2	frameweight;"
	"} ent["STRINGIFY(ENTBATCHSIZE)"];"
"};"
"readonly layout(std140/*, binding = 0*/) buffer vertexinfo"
"{"
	"struct"
	"{"
		"vec4 pos;"
	"} vert[];"
"};"
"in vec2 v_texcoord;"
"in uint v_drawid;"
#ifdef USEBINDLESS
"flat out sampler2D tex;"
"out vec2 tc;"
#else
"out vec3 tc;"
#endif
"void main() {"

#if 0	//arb_draw_parameters.
	"uint v_drawid = uint(gl_BaseInstanceARB);"
#endif

#if 1
	"uint vertid = uint(gl_VertexID) - ent[v_drawid].basevert;"
	"uvec2 fvert = ent[v_drawid].firstvert + vertid;"
	"vec2 fweight = ent[v_drawid].frameweight;"
	"vec3 pos = vert[fvert.x].pos.xyz*fweight.x + vert[fvert.y].pos.xyz*fweight.y;"
#else
	"vec3 pos = v_position;"
#endif

	"gl_Position = mvp * (ent[v_drawid].model * vec4(pos.xyz, 1.0));"

#ifdef USEBINDLESS
	"tex = ent[v_drawid].diffuse;"
	"tc = v_texcoord;"
#else
	"tc = vec3(v_texcoord, ent[v_drawid].texlayer);"
#endif
"}";
static const char *fragmentprogram =
GLSLVER
#ifdef USEBINDLESS
"#extension GL_ARB_bindless_texture : require\n"
"flat in sampler2D tex;"
"in vec2 tc;"
#else
"uniform sampler2DArray tex;"
"in vec3 tc;"
#endif
"out vec4 result;"
"void main() {"
	"result = texture(tex, tc);"
"}";

#endif

struct q1mdl_header_t {
	unsigned int	ident;
	unsigned int	version;

	float			scale[3];
	float			translate[3];
	float			radius;		//unused by us (really should calculate something for culling)
	float			eyepos[3];	//unused by us

	unsigned int	numskins;
	unsigned int	skinwidth;
	unsigned int	skinheight;

	unsigned int	numverts;
	unsigned int	numtris;
	unsigned int	numanims;

	unsigned int	synctype;	//unused by us
	unsigned int	flags;		//unused by us, for now
	float			size;
};
struct qmdl_texcoord_t
{
	unsigned int	onseam;		//treated as a boolean by us
	signed int		s;
	signed int		t;
};
struct qmdl_vertex_t
{
	unsigned char	v[3];
	unsigned char	normalIndex;
};
struct qmdl_pose_t
{
	unsigned char	bboxmin[4];
	unsigned char	bboxmax[4];
	char name[16];
};
struct qmdl_group_t
{
	unsigned int	numframes;
	unsigned char	bboxmin[4];
	unsigned char	bboxmax[4];
};
struct qmdl_triangle_t
{
	unsigned int	facesfront;	//treated as a boolean
	unsigned int	vertex[3];
};







struct skinsubframe_s {
#ifdef USEBINDLESS
	GLuint		texture;	//the texture(array) to use
	GLuint64	texhandle;	//bindless handle
#else
	float		layer;		//the layer of the texture array.
#endif
};

struct entityuniform_s {
	float matrix[16];
#ifdef USEBINDLESS
	GLuint64 texture;
	int basevert;
	float pad2;
#else
	float texlayer;
	int basevert;
	float pad2[2];
#endif
	unsigned int framepos[2];
	float frameweight[2];
};

//I actually keep forgetting I can just use a class.
class mdlbuf
{
public:
	unsigned int meshcount;

	//gl caps
	GLint ubo_alignment;			//GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT
	GLint ubo_maxsize;				//GL_MAX_UNIFORM_BLOCK_SIZE
	unsigned int ubo_maxbatchsize;	//max entities that we can fit into a ubo

#ifndef USEBINDLESS
	//skin info. no longer using bindless tw
	unsigned int skinlayers;
	unsigned int skinmaxlayers;
#ifdef USEGL
	GLuint texturearray;
#endif
#endif
	unsigned int skinwidth;	//too lazy to do bindless properly.
	unsigned int skinheight;

	//uniform buffer object
	static const int ub_pages = 4;
	struct
	{
		unsigned char *ptr;
#ifdef USEGL
		GLuint buf;
		GLsync sync;
#endif
#ifdef USEVK
		size_t offset;
		VkFence fence;
#endif
	} ub_page[ub_pages];
	unsigned int ub_curpage;
	unsigned char *ub_ptr;
#ifdef USEGL
	GLuint sceneub_buf;
	GLuint ub_buf;
#endif
	static const int ub_size = 65536 * sizeof(struct entityuniform_s);
	int ub_batch_ofs;
	GLsizeiptr ub_ofs;

	//draw indirect buffer
	//we don't need to care about fences here, as the ubo is guarenteed to be consumed before the dibs
	unsigned char *dib_ptr;
#ifdef USEGL
	GLuint dib_buf;
	static const int dib_size = sizeof(struct indirectdrawcmd_s) * 65536*ub_pages;	//huuuge buffer. too lazy to play safe.
#endif
#ifdef USEVK
	static const int dib_size = sizeof(VkDrawIndexedIndirectCommand) * 65536;
#endif
	GLsizeiptr dib_batch_ofs;
	GLsizeiptr dib_ofs;

	//these shouldn't be that dynamic, so I'm just gonna fill them with glBufferSubData because I'm lazy.
	//FIXME: assign each set of these buffers to their own thing or something.
	static const size_t vbo_size = sizeof(float)*2 * 65536*4;
	GLsizeiptr vbo_ofs;
	GLsizeiptr vbo_texcoord_offset;

	static const size_t ebo_size = sizeof(int)*3 * 65536*16;
	GLsizeiptr ebo_ofs;

	static const size_t ssbo_size = 16 * 1024 * 1024;
	GLsizeiptr ssbo_ofs;

#ifdef USEGL
	GLuint vbo_buf;	//just holds texture coords, but not vertex data
	GLuint ebo_buf;	//holds index info
	GLuint ssbo_buf;	//holds coords info
	GLuint vao;
	GLuint meshshader;
#endif
#ifdef USEVK
	vk_buffer_t dib;
	vk_buffer_t vbo;
	vk_buffer_t ebo;
	vk_buffer_t ssbo;
	vk_buffer_t sceneubo;
	vk_buffer_t entityubo;
	vk_image_t texturearray;
	VkPipeline mdlpipeline;
	VkPipelineLayout mdlpipeLayout;
	VkDescriptorSetLayout mdldescSetLayout[2];
	VkDescriptorSet descriptors[2];
	VkDescriptorPool descPools[2];
#endif

	~mdlbuf()
	{
#ifdef USEVK
		pvkUnmapMemory(vkdevice, entityubo.memory);
		pvkUnmapMemory(vkdevice, dib.memory);

		pvkDestroyPipeline(vkdevice, mdlpipeline, vkallocationcb);
		pvkDestroyDescriptorPool(vkdevice, descPools[0], vkallocationcb);
		pvkDestroyDescriptorPool(vkdevice, descPools[1], vkallocationcb);
		pvkDestroyDescriptorSetLayout(vkdevice, mdldescSetLayout[0], vkallocationcb);
		pvkDestroyDescriptorSetLayout(vkdevice, mdldescSetLayout[1], vkallocationcb);
		pvkDestroyPipelineLayout(vkdevice, mdlpipeLayout, vkallocationcb);

		DestroyBuffer(&dib);
		DestroyBuffer(&vbo);
		DestroyBuffer(&ebo);
		DestroyBuffer(&ssbo);
		DestroyBuffer(&sceneubo);
		DestroyBuffer(&entityubo);
		DestroyTexture(&texturearray);
#endif
	}
	mdlbuf()
	{
		meshcount = 0;
		vbo_texcoord_offset = 0;
#ifdef USEGL
		dib_buf = 0;
		vbo_buf = 0;
		ebo_buf = 0;
		ssbo_buf = 0;
		vao = 0;
		meshshader = 0;
#endif
	}

	void InitModels(void);
	void FlushModels(void);
	struct entityuniform_s *AddModel(int firstvert, int firstidx, int numidx, rstate_t *scene);

	int AllocTexcoords(vec2 *stcoords, unsigned int count)
	{
		int ofs = vbo_ofs;
		vbo_ofs += sizeof(*stcoords)*count;

#ifdef USEGL
		pglBindBuffer(GL_ARRAY_BUFFER, vbo_buf);
		pglBufferSubData(GL_ARRAY_BUFFER, ofs, sizeof(*stcoords)*count, stcoords);
#endif
#ifdef USEVK
		FillBufferSync(&vbo, ofs, sizeof(*stcoords)*count, stcoords);
#endif
		return (ofs-vbo_texcoord_offset) / (sizeof(float)*2);
	}

	int AllocIndexes(unsigned int *indexes, unsigned int count)
	{
		int ofs = ebo_ofs;
		ebo_ofs += sizeof(*indexes)*count;

#ifdef USEGL
		pglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_buf);
		pglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, ofs, sizeof(*indexes)*count, indexes);
#endif
#ifdef USEVK
		FillBufferSync(&ebo, ofs, sizeof(*indexes)*count, indexes);
#endif
		return ofs / sizeof(*indexes);
	}

	int AllocVertexes(float *coords4, unsigned int count)
	{
		int ofs = ssbo_ofs;
		ssbo_ofs += sizeof(*coords4)*4*count;

#ifdef USEGL
		pglBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_buf);
		pglBufferSubData(GL_SHADER_STORAGE_BUFFER, ofs, sizeof(*coords4)*4*count, coords4);
#endif
#ifdef USEVK
		FillBufferSync(&ssbo, ofs, sizeof(*coords4)*4*count, coords4);
#endif
		return ofs / (sizeof(*coords4)*4);	//needs to be first index.
	}


	struct skinsubframe_s AllocTexture(unsigned char *data, unsigned char *palette, unsigned int width, unsigned int height)
	{
		struct skinsubframe_s ret;

		unsigned char *img = new unsigned char[4*width*height];
		for (size_t i = 0; i < width*height; i++)
		{
			img[(i<<2)+0] = palette[data[i]*3+0];
			img[(i<<2)+1] = palette[data[i]*3+1];
			img[(i<<2)+2] = palette[data[i]*3+2];
			img[(i<<2)+3] = 255;
		}

#ifdef USEGL
#ifdef USEBINDLESS
		pglGenTextures(1, &ret.texture);
		pglBindTexture(GL_TEXTURE_2D, ret.texture);
		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
		pglTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
		pglTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, img);
		pglGenerateMipmap(GL_TEXTURE_2D);
		ret.texhandle = pglGetTextureHandleARB(ret.texture);
		pglMakeTextureHandleResidentARB(ret.texhandle);

		skinwidth = width;
		skinheight = height;
#else
		int mip = 0;
		if (width > skinwidth)
			Com_Printf(PRINT_ERROR, "skin width of %i exceeds max of %i\n", width, skinwidth);
		if (height > skinheight)
			Com_Printf(PRINT_ERROR, "skin height of %i exceeds max of %i\n", height, skinheight);
		if (skinlayers >= skinmaxlayers)
		{
			Com_Printf(PRINT_ERROR, "Buffer got too many skins\n");
			skinlayers = 0;
		}
		pglBindTexture(GL_TEXTURE_2D_ARRAY, texturearray);
		pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, mip, 0, 0, skinlayers, width, height, 1, GL_RGBA, GL_UNSIGNED_BYTE, img);
		pglGenerateMipmap(GL_TEXTURE_2D_ARRAY);
		ret.layer = skinlayers;
		skinlayers++;
#endif
#endif
#ifdef USEVK
		int mip = 0;
		if (width > skinwidth)
			Com_Printf(PRINT_ERROR, "skin width of %i exceeds max of %i\n", width, skinwidth);
		if (height > skinheight)
			Com_Printf(PRINT_ERROR, "skin height of %i exceeds max of %i\n", height, skinheight);
		if (skinlayers >= skinmaxlayers)
		{
			Com_Printf(PRINT_ERROR, "Buffer got too many skins\n");
			skinlayers = 0;
		}
		FillTextureSync(texturearray, width, height, skinlayers, 0, img);
		ret.layer = skinlayers;
		skinlayers++;
#endif
		delete[] img;

		return ret;
	}


#ifdef USEVK
	void LoadShader(void)
	{
		{
			VkDescriptorSetLayoutBinding db[3] = {};
			db[0].binding = 0;	//world matrix
			db[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
			db[0].descriptorCount = 1;
			db[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
			db[0].pImmutableSamplers = NULL;

			db[1].binding = 1;	//the texture array
			db[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
			db[1].descriptorCount = 1;
			db[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
			db[1].pImmutableSamplers = NULL;	//FIXME

			db[2].binding = 2;	//vertex positions.
			db[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
			db[2].descriptorCount = 1;
			db[2].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
			db[2].pImmutableSamplers = NULL;

			VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {};
			descSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
			descSetLayoutCreateInfo.bindingCount = countof(db);
			descSetLayoutCreateInfo.pBindings = db;
			VkAssert(pvkCreateDescriptorSetLayout(vkdevice, &descSetLayoutCreateInfo, NULL, &mdldescSetLayout[0]));
		}
		{
			VkDescriptorSetLayoutBinding db[1] = {};
			db[0].binding = 0;	//per-entity information
			db[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
			db[0].descriptorCount = 1;
			db[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
			db[0].pImmutableSamplers = NULL;

			VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {};
			descSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
			descSetLayoutCreateInfo.bindingCount = countof(db);
			descSetLayoutCreateInfo.pBindings = db;
			VkAssert(pvkCreateDescriptorSetLayout(vkdevice, &descSetLayoutCreateInfo, NULL, &mdldescSetLayout[1]));
		}

		VkPipelineLayoutCreateInfo pipeLayoutCreateInfo = {};
		pipeLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
		pipeLayoutCreateInfo.flags = 0;
		pipeLayoutCreateInfo.setLayoutCount = countof(mdldescSetLayout);
		pipeLayoutCreateInfo.pSetLayouts = mdldescSetLayout;
//		pipeLayoutCreateInfo.pushConstantRangeCount = 0;
//		pipeLayoutCreateInfo.pPushConstantRanges = NULL;
		VkAssert(pvkCreatePipelineLayout(vkdevice, &pipeLayoutCreateInfo, NULL, &mdlpipeLayout));

		VkPipelineShaderStageCreateInfo shaderStages[2] = {};
		shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
		shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
		shaderStages[0].module = LoadShaderCode("model_vert.spv", spv_model_vert);
		shaderStages[0].pName = "main";
		shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
		shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
		shaderStages[1].module = LoadShaderCode("model_frag.spv", spv_model_frag);
		shaderStages[1].pName = "main";

		VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE]={};
		VkPipelineDynamicStateCreateInfo dyn = {};
		dyn.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
		dyn.flags = 0;
		dyn.dynamicStateCount = 0;
		dyn.pDynamicStates = dynamicStateEnables;

		VkVertexInputBindingDescription vbinds[1] = {};
		vbinds[0].binding = 0;
		vbinds[0].stride = sizeof(float)*2;	//just texcoords.
		vbinds[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
		VkVertexInputAttributeDescription vattrs[1] = {};
		vattrs[0].binding = vbinds[0].binding;
		vattrs[0].location = 0;
		vattrs[0].format = VK_FORMAT_R32G32_SFLOAT;
		vattrs[0].offset = 0;
		VkPipelineVertexInputStateCreateInfo vi = {};
		vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
		vi.vertexBindingDescriptionCount = countof(vbinds);
		vi.pVertexBindingDescriptions = vbinds;
		vi.vertexAttributeDescriptionCount = countof(vattrs);
		vi.pVertexAttributeDescriptions = vattrs;

		VkPipelineInputAssemblyStateCreateInfo ia = {};
		ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
		ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
		VkPipelineViewportStateCreateInfo vp = {};
		vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
		vp.viewportCount = 1;
		dynamicStateEnables[dyn.dynamicStateCount++] =	VK_DYNAMIC_STATE_VIEWPORT;
		vp.scissorCount = 1;
		dynamicStateEnables[dyn.dynamicStateCount++] =	VK_DYNAMIC_STATE_SCISSOR;
		VkPipelineRasterizationStateCreateInfo rs = {};
		rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
		rs.polygonMode = VK_POLYGON_MODE_FILL;
		rs.cullMode = VK_CULL_MODE_BACK_BIT;
		rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
		rs.depthClampEnable = VK_FALSE;
		rs.rasterizerDiscardEnable = VK_FALSE;
		rs.depthBiasEnable = VK_FALSE;
		VkPipelineMultisampleStateCreateInfo ms = {};
		ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
		ms.pSampleMask = NULL;
		ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
		VkPipelineDepthStencilStateCreateInfo ds = {};
		ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
		ds.depthTestEnable = VK_TRUE;
		ds.depthWriteEnable = VK_TRUE;
		ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
		ds.depthBoundsTestEnable = VK_FALSE;
		ds.back.failOp = VK_STENCIL_OP_KEEP;
		ds.back.passOp = VK_STENCIL_OP_KEEP;
		ds.back.compareOp = VK_COMPARE_OP_ALWAYS;
		ds.stencilTestEnable = VK_FALSE;
		ds.front = ds.back;
		VkPipelineColorBlendStateCreateInfo cb = {};
		cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
		VkPipelineColorBlendAttachmentState att_state[1];
		memset(att_state, 0, sizeof(att_state));
		att_state[0].colorWriteMask = 0xf;
		att_state[0].blendEnable = VK_FALSE;
		cb.attachmentCount = 1;
		cb.pAttachments = att_state;


		VkGraphicsPipelineCreateInfo pipeCreateInfo = {};
		pipeCreateInfo.sType				= VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
		pipeCreateInfo.flags				= 0;
		pipeCreateInfo.stageCount			= countof(shaderStages);
		pipeCreateInfo.pStages				= shaderStages;
		pipeCreateInfo.pVertexInputState	= &vi;
		pipeCreateInfo.pInputAssemblyState	= &ia;
		pipeCreateInfo.pTessellationState	= NULL;	//null is okay!
		pipeCreateInfo.pViewportState		= &vp;
		pipeCreateInfo.pRasterizationState	= &rs;
		pipeCreateInfo.pMultisampleState	= &ms;
		pipeCreateInfo.pDepthStencilState	= &ds;
		pipeCreateInfo.pColorBlendState		= &cb;
		pipeCreateInfo.pDynamicState		= &dyn;
		pipeCreateInfo.layout				= mdlpipeLayout;
		pipeCreateInfo.renderPass			= vkrenderpass;
		pipeCreateInfo.subpass				= 0;
		pipeCreateInfo.basePipelineHandle	= VK_NULL_HANDLE;
		pipeCreateInfo.basePipelineIndex	= 0;

		VkAssert(pvkCreateGraphicsPipelines(vkdevice, VK_NULL_HANDLE, 1, &pipeCreateInfo, NULL, &mdlpipeline));

		pvkDestroyShaderModule(vkdevice, shaderStages[0].module, vkallocationcb);
		pvkDestroyShaderModule(vkdevice, shaderStages[1].module, vkallocationcb);
	}
	void LoadDescriptors(void)
	{
		{
			VkDescriptorPoolSize dpisz[3] = {};
			dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
			dpisz[0].descriptorCount = 1;
			dpisz[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
			dpisz[1].descriptorCount = 1;
			dpisz[2].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
			dpisz[2].descriptorCount = 1;
			VkDescriptorPoolCreateInfo dpi = {};	// the creation info states how many descriptor sets are in this pool
			dpi.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
			dpi.flags = 0;
			dpi.maxSets = 1;
			dpi.poolSizeCount = countof(dpisz);
			dpi.pPoolSizes = dpisz;

			VkAssert(pvkCreateDescriptorPool(vkdevice, &dpi, NULL, &descPools[0]));

			VkDescriptorSetAllocateInfo descAllocInfo = {};	// from pool descPool, with layout descSetLayout
			descAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
			descAllocInfo.descriptorPool = descPools[0];
			descAllocInfo.descriptorSetCount = 1;
			descAllocInfo.pSetLayouts = &mdldescSetLayout[0];

			VkAssert(pvkAllocateDescriptorSets(vkdevice, &descAllocInfo, &descriptors[0]));
		}

		{
			VkDescriptorPoolSize dpisz[1] = {};
			dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
			dpisz[0].descriptorCount = 1;
			VkDescriptorPoolCreateInfo dpi = {};	// the creation info states how many descriptor sets are in this pool
			dpi.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
			dpi.flags = 0;
			dpi.maxSets = 1;
			dpi.poolSizeCount = countof(dpisz);
			dpi.pPoolSizes = dpisz;

			VkAssert(pvkCreateDescriptorPool(vkdevice, &dpi, NULL, &descPools[1]));

			VkDescriptorSetAllocateInfo descAllocInfo = {};	// from pool descPool, with layout descSetLayout
			descAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
			descAllocInfo.descriptorPool = descPools[1];
			descAllocInfo.descriptorSetCount = 1;
			descAllocInfo.pSetLayouts = &mdldescSetLayout[1];

			VkAssert(pvkAllocateDescriptorSets(vkdevice, &descAllocInfo, &descriptors[1]));
		}

		VkDescriptorImageInfo imginfo = {};
		imginfo.sampler = texturearray.sampler;
		imginfo.imageView = texturearray.imageView;
		imginfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

		VkDescriptorBufferInfo suboinfo = {};
		suboinfo.buffer = sceneubo.buffer;
		suboinfo.offset = 0;
		suboinfo.range = sizeof(matrix4x4);

		VkDescriptorBufferInfo ssboinfo = {};
		ssboinfo.buffer = ssbo.buffer;
		ssboinfo.offset = 0;
		ssboinfo.range = ssbo_size;

		VkDescriptorBufferInfo euboinfo = {};
		euboinfo.buffer = entityubo.buffer;
		euboinfo.offset = 0;
		euboinfo.range = sizeof(struct entityuniform_s) * ENTBATCHSIZE;

		VkWriteDescriptorSet descs[3] = {};	// write the details of our UBO buffer into binding 0
		descs[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descs[0].dstSet = descriptors[0];
		descs[0].dstBinding = 0;
		descs[0].dstArrayElement = 0;
		descs[0].descriptorCount = 1;
		descs[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
		descs[0].pImageInfo = NULL;
		descs[0].pBufferInfo = &suboinfo;
		descs[0].pTexelBufferView = NULL;

		descs[1].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descs[1].dstSet = descriptors[0];
		descs[1].dstBinding = 1;
		descs[1].dstArrayElement = 0;
		descs[1].descriptorCount = 1;
		descs[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
		descs[1].pImageInfo = &imginfo;
		descs[1].pBufferInfo = NULL;
		descs[1].pTexelBufferView = NULL;

		descs[2].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descs[2].dstSet = descriptors[0];
		descs[2].dstBinding = 2;
		descs[2].dstArrayElement = 0;
		descs[2].descriptorCount = 1;
		descs[2].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
		descs[2].pImageInfo = NULL;
		descs[2].pBufferInfo = &ssboinfo;
		descs[2].pTexelBufferView = NULL;

		pvkUpdateDescriptorSets(vkdevice, countof(descs), descs, 0, NULL);

		//NVIDIA SUCK BIG HAIRY DONKEY BALLS
		descs[0].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descs[0].dstSet = descriptors[1];
		descs[0].dstBinding = 0;
		descs[0].dstArrayElement = 0;
		descs[0].descriptorCount = 1;
		descs[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
		descs[0].pImageInfo = NULL;
		descs[0].pBufferInfo = &euboinfo;
		descs[0].pTexelBufferView = NULL;
		pvkUpdateDescriptorSets(vkdevice, 1, descs, 0, NULL);
	}
#endif
};
void mdlbuf::InitModels(void)
{
#ifdef USEVK
	LoadShader();

	skinwidth = 512;
	skinheight = 256;
	skinmaxlayers = 128;
	skinlayers = 0;	//allocated count.
	texturearray = CreateTexture2DArray(skinwidth, skinheight, skinmaxlayers, 1);
	CreateTextureSampler(&texturearray, true, true);

	//static buffers
	vbo = CreateBuffer(vbo_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, false);
	ebo = CreateBuffer(ebo_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT, false);
	ssbo = CreateBuffer(ssbo_size, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, false);

	//dynamic buffers. gah!
	sceneubo = CreateBuffer(sizeof(matrix4x4), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, true);
	entityubo = CreateBuffer(ub_size*ub_pages, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, true);
	dib = CreateBuffer(dib_size, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT, true);

	ubo_alignment = (uint32_t)entityubo.alignment-1;

//	ub_page[0].ptr = (unsigned char *)malloc(ub_size*ub_pages);
	pvkMapMemory(vkdevice, entityubo.memory, 0, ub_size*ub_pages, 0, (void**)&ub_page[0].ptr);
	for (unsigned int i = 1; i < ub_pages; i++)
	{
		ub_page[i].ptr = ub_page[i-1].ptr + ub_size;
	}

//	dib_ptr = (unsigned char *)malloc(dib_size);
	pvkMapMemory(vkdevice, dib.memory, 0, dib_size, 0, (void**)&dib_ptr);

	LoadDescriptors();



	dib_batch_ofs = dib_ofs = 0;
	ub_batch_ofs = ub_ofs = 0;
	ub_curpage = 0;
	ub_ptr = ub_page[ub_curpage].ptr;
	vbo_ofs = 0;
	ebo_ofs = 0;
	ssbo_ofs = 0;

	
	ubo_maxbatchsize = ubo_maxsize / sizeof(struct entityuniform_s);	//figure out how many ents can fit.
	if (ubo_maxbatchsize > ENTBATCHSIZE)	//compensate for our glsl using a hardcoded limit.
		ubo_maxbatchsize = ENTBATCHSIZE;
#endif

#ifdef USEGL
	GLbitfield mapflags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
	GLbitfield createflags = mapflags | GL_DYNAMIC_STORAGE_BIT;
	int atr;

	pglGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &ubo_alignment);	//ubos have a base alignment if you're using ranges.
	ubo_alignment -= 1;
	pglGetIntegerv(GL_MAX_UNIFORM_BLOCK_SIZE,			&ubo_maxsize);	//maximum size of a single ubo range. a shader limitation rather than a buffer one.

#ifndef USEBINDLESS
	skinwidth = 512;
	skinheight = 256;
	skinmaxlayers = 128;
	skinlayers = 0;	//allocated count.
	pglGenTextures(1, &texturearray);
	pglBindTexture(GL_TEXTURE_2D_ARRAY, texturearray);
	if (pglTexStorage3D)	//fixme: mipmaps. but I guess sw never had any
		pglTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8, skinwidth, skinheight, skinmaxlayers);
	pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
	pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_MAG_FILTER, GL_LINEAR);
	pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_WRAP_S,GL_CLAMP_TO_EDGE);
	pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_WRAP_T,GL_CLAMP_TO_EDGE);
#endif


	if (!vao)
		pglGenVertexArrays(1, &vao);
	pglBindVertexArray(vao);
	if (!meshshader)
	{
		meshshader = GL_CompileShader(vertexprogram, fragmentprogram);

		GLuint ub = pglGetUniformBlockIndex(meshshader,  "MatrixBlock");
		if (ub != -1)
			pglUniformBlockBinding(meshshader, ub, 0);
		ub = pglGetUniformBlockIndex(meshshader,  "EntityBuffer");
		if (ub != -1)
			pglUniformBlockBinding(meshshader, ub, 1);
//		ub = pglGetUniformBlockIndex(meshshader,  "vertexinfo");
//		if (ub != -1)
//			pglUniformBlockBinding(meshshader, ub, 0);
	}

	dib_batch_ofs = dib_ofs = 0;
	pglGenBuffers(1, &dib_buf);
	pglBindBuffer(GL_DRAW_INDIRECT_BUFFER, dib_buf);
	pglBufferStorage(GL_DRAW_INDIRECT_BUFFER, dib_size, NULL, createflags);
	dib_ptr = (unsigned char *)pglMapBufferRange(GL_DRAW_INDIRECT_BUFFER, 0, dib_size, mapflags);

	ub_batch_ofs = ub_ofs = 0;
	for (unsigned int i = 0; i < ub_pages; i++)
	{
		pglGenBuffers(1, &ub_page[i].buf);
		pglBindBuffer(GL_UNIFORM_BUFFER, ub_page[i].buf);
		pglBufferStorage(GL_UNIFORM_BUFFER, ub_size, NULL, createflags);
		ub_page[i].ptr = (unsigned char*)pglMapBufferRange(GL_UNIFORM_BUFFER, 0, ub_size, mapflags);
		ub_page[i].sync = pglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
	}
	ub_curpage = 0;
	ub_ptr = ub_page[ub_curpage].ptr;
	ub_buf = ub_page[ub_curpage].buf;
	
	ubo_maxbatchsize = ubo_maxsize / sizeof(struct entityuniform_s);	//figure out how many ents can fit.
	if (ubo_maxbatchsize > ENTBATCHSIZE)	//compensate for our glsl using a hardcoded limit.
		ubo_maxbatchsize = ENTBATCHSIZE;

	vbo_ofs = 0;
	pglGenBuffers(1, &vbo_buf);
	pglBindBuffer(GL_ARRAY_BUFFER, vbo_buf);
	pglBufferData(GL_ARRAY_BUFFER, vbo_size, NULL, GL_DYNAMIC_DRAW);

	ebo_ofs = 0;
	pglGenBuffers(1, &ebo_buf);
	pglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_buf);
	pglBufferData(GL_ELEMENT_ARRAY_BUFFER, ebo_size, NULL, GL_DYNAMIC_DRAW);

	ssbo_ofs = 0;
	pglGenBuffers(1, &ssbo_buf);
	pglBindBuffer(GL_SHADER_STORAGE_BUFFER, ssbo_buf);
	pglBufferData(GL_SHADER_STORAGE_BUFFER, ssbo_size, NULL, GL_DYNAMIC_DRAW);

//	pglBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo_buf);


	//instanced attribute
	atr = pglGetAttribLocation(meshshader, "v_drawid");
	if (atr != -1)
	{
		pglBindBuffer(GL_ARRAY_BUFFER, vbo_buf);
		unsigned int *tmpptr = (unsigned int *)pglMapBufferRange(GL_ARRAY_BUFFER, vbo_ofs, sizeof(unsigned int)*ubo_maxbatchsize, GL_MAP_WRITE_BIT);
		for (unsigned int i = 0; i < ubo_maxbatchsize; i++)
			tmpptr[i] = i;
		pglUnmapBuffer(GL_ARRAY_BUFFER);	//technically this is allowed to fail.
		pglVertexAttribIPointer(atr, 1, GL_UNSIGNED_INT, sizeof(unsigned int), (void*)vbo_ofs);
		vbo_ofs += sizeof(unsigned int)*ubo_maxbatchsize;
		pglEnableVertexAttribArray(atr);
		pglVertexAttribDivisor(atr, 1);
	}

	//texcoords attribute
	atr = pglGetAttribLocation(meshshader, "v_texcoord");
	if (atr != -1)
	{
		vbo_texcoord_offset = vbo_ofs;
		pglBindBuffer(GL_ARRAY_BUFFER, vbo_buf);
		pglVertexAttribPointer(atr, 2, GL_FLOAT, FALSE, sizeof(float)*2, (void*)vbo_ofs);
		pglEnableVertexAttribArray(atr);
	}
#endif
}

static class mdlbuf *modelbuffer;

void GL_FlushModels(void)
{
	if (!modelbuffer)
		return;
	modelbuffer->FlushModels();
}
void GL_PurgeModels(void)
{
	if (!modelbuffer)
		return;
	delete modelbuffer;
}

void mdlbuf::FlushModels(void)
{
	if (!meshcount)
		return;

	//glBindBufferRange doesn't appear to like truncated / dynamically sized uniform arrays.
	ub_ofs = ub_batch_ofs + sizeof(struct entityuniform_s)*ubo_maxbatchsize;
	ub_ofs = ub_batch_ofs + sizeof(struct entityuniform_s)*meshcount;

#ifdef USEGL
	pglBindVertexArray(vao);
#ifndef USEBINDLESS
	pglBindTexture(GL_TEXTURE_2D_ARRAY, texturearray);
#endif
	pglUseProgram(meshshader);
	pglBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo_buf);
	pglBindBufferBase(GL_UNIFORM_BUFFER, 0, sceneub_buf);
	pglBindBufferRange(GL_UNIFORM_BUFFER, 1, ub_buf, ub_batch_ofs, ub_ofs - ub_batch_ofs);
	pglBindBuffer(GL_DRAW_INDIRECT_BUFFER, dib_buf);
//	pglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo_buf);	//supposed to be part of the vao
	pglMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, (void*)dib_batch_ofs, meshcount, 0);

//	pglUseProgram(0);
//	pglBindVertexArray(0);
#endif
#ifdef USEVK

	pvkCmdBindPipeline(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, mdlpipeline);
	uint32_t dynoffsets[1] = {ub_ptr+ub_batch_ofs - ub_page[0].ptr};
	pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, mdlpipeLayout, /*first*/0, 1, &descriptors[0], 0, NULL);
	pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, mdlpipeLayout, /*first*/1, 1, &descriptors[1], 1, dynoffsets);
	pvkCmdBindIndexBuffer(vkmaincmd, ebo.buffer, 0, VK_INDEX_TYPE_UINT32);
	const VkDeviceSize offsets[1] = {0};
	pvkCmdBindVertexBuffers(vkmaincmd, 0, 1, &vbo.buffer, offsets);

//	FillBufferSync(&entityubo, ub_ptr+ub_batch_ofs - ub_page[0].ptr, sizeof(struct entityuniform_s)*meshcount, ub_ptr+ub_batch_ofs);
//	VkDrawIndexedIndirectCommand *cmd = (VkDrawIndexedIndirectCommand*)(dib_ptr+dib_batch_ofs);
//	for (uint32_t i = 0; i < meshcount; i++, cmd++)
//		pvkCmdDrawIndexed(vkmaincmd, cmd->indexCount, cmd->instanceCount, cmd->firstIndex, cmd->vertexOffset, cmd->firstInstance);
	pvkCmdDrawIndexedIndirect(vkmaincmd, dib.buffer, dib_batch_ofs, meshcount, sizeof(VkDrawIndexedIndirectCommand));
#endif

	meshcount = 0;

	//uniform buffers have a certain granularity requirement
	ub_ofs = (ub_ofs + ubo_alignment) & ~ubo_alignment;
	if (ub_ofs+sizeof(struct entityuniform_s)*ubo_maxbatchsize > ub_size)
	{
#ifdef USEGL
		ub_page[ub_curpage].sync = pglFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
#endif

		ub_curpage = (ub_curpage+1) % ub_pages;
		ub_ptr = ub_page[ub_curpage].ptr;
#ifdef USEGL
		ub_buf = ub_page[ub_curpage].buf;
#endif
		ub_ofs = 0;

#ifdef USEGL
		if (ub_page[ub_curpage].sync)
		{
			//wait until we can start writing into the next buffer.
			pglClientWaitSync(ub_page[ub_curpage].sync, GL_SYNC_FLUSH_COMMANDS_BIT, GL_TIMEOUT_IGNORED);

			//track when we were done with the last one
			pglDeleteSync(ub_page[ub_curpage].sync);
			ub_page[ub_curpage].sync = NULL;
		}
#endif
	}
	ub_batch_ofs = ub_ofs;
	dib_batch_ofs = dib_ofs;
}

struct entityuniform_s *mdlbuf::AddModel(int firstvert, int firstidx, int numidx, rstate_t *scene)
{
#ifdef USEGL
	struct indirectdrawcmd_s *cmd;
#endif
#ifdef USEVK
	VkDrawIndexedIndirectCommand *cmd;
#endif

	struct entityuniform_s *ret;

	if (dib_ofs+sizeof(*cmd) > dib_size)
	{
		FlushModels();
		dib_batch_ofs = dib_ofs = 0;
	}
	if (meshcount >= ubo_maxbatchsize)
		FlushModels();

	if (!meshcount)
	{
#ifdef USEVK
		FillBufferSync(&sceneubo, 0, sizeof(scene->viewproj.data), scene->viewproj.data);
#endif
#ifdef USEGL
		if (!sceneub_buf)
			pglGenBuffers(1, &sceneub_buf);
		pglBindBuffer(GL_UNIFORM_BUFFER, sceneub_buf);
		pglBufferData(GL_UNIFORM_BUFFER, sizeof(scene->viewproj.data), (void*)scene->viewproj.data, GL_DYNAMIC_DRAW);
#endif
	}

#ifdef USEGL
	cmd = (struct indirectdrawcmd_s*)(dib_ptr+dib_ofs);

	cmd->count = numidx;
	cmd->instanceCount = 1;
	cmd->firstIndex = firstidx;
	cmd->baseVertex = firstvert;
	cmd->baseInstance = meshcount;
#endif
#ifdef USEVK
	cmd = (VkDrawIndexedIndirectCommand*)(dib_ptr+dib_ofs);

	cmd->indexCount = numidx;
	cmd->instanceCount = 1;
	cmd->firstIndex = firstidx;
	cmd->vertexOffset = firstvert;
	cmd->firstInstance = meshcount;
#endif

	dib_ofs += sizeof(*cmd);
	meshcount += 1;

	ret = (struct entityuniform_s*)(ub_ptr + ub_ofs);
	ub_ofs += sizeof(*ret);
	return ret;
}






class q1mdlmodel : model_c
{
private:
	mdlbuf *batch;
	typedef struct {
		unsigned int numframes;
		float rate;
		GLuint framedata;
	} framegroup_t;
	framegroup_t *framegroups;
	typedef struct {
		unsigned int numskins;
		float rate;

		struct skinsubframe_s *subframe;
	} skingroup_t;
	skingroup_t *skingroup;
	unsigned int numverts;
	unsigned int numindexes;
	unsigned int numframegroups;
	unsigned int numskingroups;

	GLuint texcoords;
	GLuint indexes;

	vec3 *TempVerts(unsigned int count)
	{
		static vec3 *oldbuffer;
		static unsigned int oldcount;
		if (oldcount < count)
		{
			delete oldbuffer;
			oldcount = count;
			oldbuffer = new vec3[oldcount];
		}

		return oldbuffer;
	}

#ifdef USEBINDLESS
	GLuint64 ChooseSkin(rentity_t *re)
	{
		int skinnum = re->skinnum%numskingroups;
		int subframe;
		
		skingroup_t *sg;
		sg = &skingroup[skinnum];
		subframe = re->skintime/sg->rate;
		subframe %= sg->numskins;

		return sg->subframe[subframe].texhandle;
	}
#else
	float ChooseSkin(rentity_t *re)
	{
		int skinnum = re->skinnum%numskingroups;
		int subframe;
		
		skingroup_t *sg;
		sg = &skingroup[skinnum];
		subframe = re->skintime/sg->rate;
		subframe %= sg->numskins;

		return sg->subframe[subframe].layer;
	}
#endif
	void ChooseFrame(rentity_t *re, struct entityuniform_s *e)
	{
#define MAXANIMS 2
		unsigned int frame, subframe, numsubframes;
		int frametime;

		GLuint frames[MAXANIMS*2];	//ssbo offsets.
		float frameweights[MAXANIMS*2];
		float lerp;
		unsigned int numinfluences = 0;

		unsigned int i;
		if (numframegroups <= 0)
		{
			e->frameweight[0] = e->frameweight[1] = 0;
			e->framepos[0] = e->framepos[1] = 0;
			return;
		}

		//note that input sequence weights are not clamped to a total of 1
		//this allows multiple animations on a single model, assuming they are totally seperate anims (with all other verts not animated by that anim set to 0)...
		//yeah, right, like that's ever going to be useful!
		//but it is at least consistant with future skeletal model formats.

		for (i = 0; i < MAXANIMS; i++)
		{
			frame = re->anim[i]%numframegroups;
			numsubframes = framegroups[frame].numframes;
			if (numsubframes == 1)
			{
				frames[numinfluences] = framegroups[frame].framedata;
				frameweights[numinfluences] = re->animweight[i];
				numinfluences++;
			}
			else
			{
				lerp = re->animtime[i]/framegroups[frame].rate;
				if (lerp < 0) lerp = 0;
				subframe = lerp;
				frametime = lerp - subframe;
				lerp = re->animweight[i];

				frames[numinfluences] = framegroups[frame].framedata+((subframe+0)%numsubframes)*numverts;
				frameweights[numinfluences] = (lerp*frametime) / framegroups[frame].rate;
				numinfluences++;
				frames[numinfluences] = framegroups[frame].framedata+((subframe+1)%numsubframes)*numverts;
				frameweights[numinfluences] = (lerp*(framegroups[frame].rate - frametime)) / framegroups[frame].rate;
				numinfluences++;
			}
		}

		//clear out any that we're not using
		for (i = 0; i < numinfluences; i++)
		{
			if (!frameweights[i])
			{
				numinfluences--;
				if (i == numinfluences)
					break;
				frameweights[i] = frameweights[numinfluences];
				frames[i] = frames[numinfluences];
				i--;
			}
		}
		//rescale influences to 1


		switch(numinfluences)
		{
		case 0:
			e->framepos[0] = e->framepos[1] = framegroups[0].framedata;
			e->frameweight[0] = 1;
			e->frameweight[1] = 0;
			break;
		case 1:
			e->framepos[0] = e->framepos[1] = frames[0];
			e->frameweight[0] = frameweights[0];
			e->frameweight[1] = 0;
			break;
		case 2:
			e->framepos[0] = frames[0];
			e->framepos[1] = frames[1];
			e->frameweight[0] = frameweights[0];
			e->frameweight[1] = frameweights[1];
			break;
		}
	}

public:
	void DrawEntity(rentity_t *ent, rstate_t *scene)
	{
		struct entityuniform_s *e = batch->AddModel(this->texcoords, this->indexes, numindexes, scene);
		memcpy(e->matrix, ent->matrix.data, sizeof(e->matrix));
#ifdef USEBINDLESS
		e->texture = ChooseSkin(ent);
		e->basevert = this->texcoords;
		e->pad2 = 0;
#else
		e->texlayer = ChooseSkin(ent);
		e->basevert = this->texcoords;
		e->pad2[0] = 0;
		e->pad2[1] = 0;
#endif
		ChooseFrame(ent, e);
	}

	static model_c *LoadModel(const char *name, unsigned char *filedata, unsigned int length)
	{
		q1mdlmodel *m;
		q1mdl_header_t *header = (q1mdl_header_t*)filedata;
		int numelements;
		int elementnum;
		int vertnum;
		int numverts;
		mdlbuf *batch;

#ifdef USEGL
		if (!pglFenceSync)
		{
			Com_Printf(PRINT_ERROR, "Missing GL extensions, unable to render mdls\n");
			return NULL;
		}
#endif

		if (header->version != 6)
		{
			Com_Printf(PRINT_ERROR, "Q1 models version %i are not supported\n", header->version);
			return NULL;
		}

		if (header->numskins > 4096 || header->numtris > 65536 || header->numverts > 65536 || header->skinwidth < 4 || header->skinheight < 4 || header->skinwidth > 4096 || header->skinheight > 4096)
		{
			Com_Printf(PRINT_ERROR, "Q1 model sanity limit exceeded\n");
			return NULL;
		}

		m = new q1mdlmodel();

		m->numframegroups = 0;
		m->numindexes = 0;
		m->numverts = 0;
		m->numskingroups = 0;

		m->cullradius = 0;
		for (unsigned int i = 0; i < 3; i++)
		{
			if (m->cullradius < header->scale[i]*255 + header->translate[i])
				m->cullradius = header->scale[i]*255 + header->translate[i];
			if (m->cullradius < -header->translate[i])
				m->cullradius = -header->translate[i];
		}

		if (!modelbuffer)
		{
			modelbuffer = new mdlbuf();
			modelbuffer->InitModels();
		}
		batch = modelbuffer;
		m->batch = batch;

		unsigned char *quakepalette = FS_ReadFile("gfx/palette.lmp", NULL);
		if (!quakepalette)
			Sys_Error("Q1 models require a Q1 palette");

		unsigned int *skintype = (unsigned int*)(header + 1);
		numelements = m->numskingroups = header->numskins;
		m->skingroup = new skingroup_t[numelements];
		for (elementnum = 0; elementnum < numelements; elementnum++)
		{
			unsigned char *skindata;
			if (*skintype == 0)
			{
				skindata = (unsigned char *)(skintype+1);
				m->skingroup[elementnum].numskins = 1;
				m->skingroup[elementnum].rate = 1;
			}
			else if (*skintype == 1)
			{
				m->skingroup[elementnum].numskins = skintype[1];
				float *intervals = (float *)(skintype+2);
				m->skingroup[elementnum].rate = *intervals;
				skindata = (unsigned char *)(intervals+m->skingroup[elementnum].numskins);
			}
			else
				Sys_Error("Skin group type not supported\n");

			m->skingroup[elementnum].subframe = new skinsubframe_s[m->skingroup[elementnum].numskins];
			for (unsigned int j = 0; j < m->skingroup[elementnum].numskins; j++)
			{
				m->skingroup[elementnum].subframe[j] = batch->AllocTexture(skindata, quakepalette, header->skinwidth, header->skinheight);
				skindata += header->skinwidth * header->skinheight;
			}
			skintype = (unsigned int*)skindata;
		}

		delete quakepalette;

		//s/t
		//FIXME: this generates redundant verticies, which bloats the size of the per-frame data.
		numelements = header->numverts;
		m->numverts = numelements*2;
		qmdl_texcoord_t *stin = (qmdl_texcoord_t*)skintype;
		vec2 *stout = new vec2[m->numverts];
		for (elementnum = 0; elementnum < numelements; elementnum++, stin++)
		{
			stout[elementnum][0] = stin->s / (float)batch->skinwidth;
			stout[elementnum][1] = stin->t / (float)batch->skinheight;
			stout[elementnum+numelements][0] = (stin->s+(stin->onseam?header->skinwidth/2.0:0)) / (float)batch->skinwidth;
			stout[elementnum+numelements][1] = stin->t / (float)batch->skinheight;
		}
		m->texcoords = batch->AllocTexcoords(stout, m->numverts);
		delete[] stout;

		//triangles
		qmdl_triangle_t *triin = (qmdl_triangle_t*)stin;
		numelements = header->numtris;
		m->numindexes = numelements*3;
		unsigned int *triout = new unsigned int[m->numindexes];
		for (elementnum = 0; elementnum < numelements; elementnum++, triin++, triout+=3)
		{
			assert(triin->vertex[0] < header->numverts);
			assert(triin->vertex[1] < header->numverts);
			assert(triin->vertex[2] < header->numverts);

			if (triin->facesfront)
			{
				triout[2] = triin->vertex[0];
				triout[1] = triin->vertex[1];
				triout[0] = triin->vertex[2];
			}
			else
			{
				triout[2] = triin->vertex[0] + header->numverts;
				triout[1] = triin->vertex[1] + header->numverts;
				triout[0] = triin->vertex[2] + header->numverts;
			}
		}
		triout-=3*numelements;
		m->indexes = batch->AllocIndexes(triout, m->numindexes);
		delete[] triout;

		//frames
		unsigned int *frametype = (unsigned int*)(triin);
		numelements = m->numframegroups = header->numanims;
		m->framegroups = new framegroup_t[numelements];
		for (elementnum = 0; elementnum < numelements; elementnum++)
		{
			qmdl_pose_t *posedata;
			if (*frametype == 0)
			{
				m->framegroups[elementnum].numframes = 1;
				m->framegroups[elementnum].rate = 1;

				posedata = (qmdl_pose_t*)(frametype+1);
			}
			else if (*frametype == 1)
			{
				qmdl_group_t *grp = (qmdl_group_t*)(frametype+1);
				assert(grp->numframes < 256);	//sanity
				m->framegroups[elementnum].numframes = grp->numframes;
				m->framegroups[elementnum].rate = 1;

				if (grp->numframes < 1)
					Sys_Error("Frame group with no poses\n");

				float *intervals = (float*)(grp+1);

				m->framegroups[elementnum].rate = *intervals;

				posedata = (qmdl_pose_t*)(intervals+m->framegroups[elementnum].numframes);
			}
			else
			{
				Sys_Error("bad Frame group type\n");
				break;
			}

			float *outvert = new float[4*m->numverts*m->framegroups[elementnum].numframes];

			numverts = header->numverts;
			for (unsigned int subframe = 0; subframe < m->framegroups[elementnum].numframes; subframe++)
			{
				qmdl_vertex_t *invert = (qmdl_vertex_t*)(posedata+1);
				for (vertnum = 0; vertnum < numverts; vertnum++, invert++)
				{
					outvert[((vertnum+numverts)<<2)+0] = outvert[(vertnum<<2)+0] = header->translate[0] + header->scale[0]*invert->v[0];
					outvert[((vertnum+numverts)<<2)+1] = outvert[(vertnum<<2)+1] = header->translate[1] + header->scale[1]*invert->v[1];
					outvert[((vertnum+numverts)<<2)+2] = outvert[(vertnum<<2)+2] = header->translate[2] + header->scale[2]*invert->v[2];
					outvert[((vertnum+numverts)<<2)+3] = outvert[(vertnum<<2)+3] = 1;
				}
				outvert += numverts*8;
				posedata = (qmdl_pose_t*)invert;
			}

			numverts *= m->framegroups[elementnum].numframes;
			outvert -= numverts*8;
			numverts *= 2;
			m->framegroups[elementnum].framedata = batch->AllocVertexes(outvert, numverts);

			delete[] outvert;

			frametype = (unsigned int*)(posedata);
		}

#ifdef USEGL
		pglBindVertexArray(0);
#endif
		return m;
	}
};

modeltype q1mdl(*(unsigned int*)"IDPO", q1mdlmodel::LoadModel, "q1 mdl");

#endif