//Certain sections of this file are likly copyright by id.
//This was coded by a guy who has previously looked at q1 source in a fair ammount of depth.
//This file should thus be considered GPL, even though nothing was (directly) copied.

//It definatly has a dependancy on q1's qbsp/light/vis tool source headers (considered a statement of fact under copyrights, iiuc).


#include "common.h"
#include "renderer.h"
#ifdef USEGL
#include "glrenderer.h"
#endif
#ifdef USEVK
#include "vkrenderer.h"
#endif
#include "filesystem.h"

#include "bspdefs.h"
#include <malloc.h>

//#define NOMERGETEXTURES	//allows combining multiple world textures into a single shape. otherwise its more expensive

struct gmaterials_s
{
	uint64_t d;
	uint64_t pad;
};





enum
{
	MODE_PVSONLY_MULTI,
	//no frustum checks, pvs only, cached between frames.
	//no depth sorting.
	//when porting this to an existing quake engine, the lack of bindless textures will result in extra draw calls, which the engine will need to deal with somehow.
	//this implementation already splits batches by texture, so we can use multidrawelementsindirect, so that each texture is its own (effective) draw call. this makes ATI's dynamically-uniform-only bindless-textures happy.

	MODE_WHOLEWORLD_MULTI,
	//entire world (no pvs / frustum)
	//no depth sorting.
	//sounds shit, but the vertex data is nicely ordered and cache friendly, and doesn't need to call GetLeafAtOrg each frame

#ifdef USEBINDLESS
	MODE_PVSONLY_NV,
	//exactly like MODE_PVSONLY_MULTI, except that everything is thrown into a single blob. this means that different 'shapes' are all mixed up, which violates ATI's more limited bindless texture support.
	MODE_FRUSTUM_NV,
	//quake1-style frustum stuff, frustum checks, dynamic every frame.
	//surfaces are output in nearest-first ordering (to verify: disable depth testing and then the furthest will draw over the nearest).
	//because surfaces are fully depth sorted, this breaks ati bindless texture requirements.
	MODE_WHOLEWORLD_NV,
	//entire world, no culling at all
	//takes one draw per texture shape.
#endif
	MODE_MAX
};

#ifdef USEGL
#define CheckGLError() do { GLenum er = pglGetError(); if (er) Com_Printf(PRINT_ERROR, "Error %x at %s %i\n", er, __FILE__, __LINE__); } while(0)
#else
#define CheckGLError()
#endif
#define assert(cond) while (!(cond)) Sys_Error("Assertion failed on line %i in "__FILE__, __LINE__)

#ifdef USEGL
#define GLSLVER "#version 140\n"	//3.1

static const char *vertexprogram =
GLSLVER
#ifdef USEBINDLESS
"#extension GL_ARB_bindless_texture : require\n"
#endif
"layout(std140) uniform MatrixBlock"
"{"
	"mat4 mvp;"
	/*
	If the member is an array of scalars or vectors, the base alignment and array stride are set to match the base alignment of a single
    array element, according to rules (1), (2), and (3), and rounded up to the base alignment of a vec4

	or in other words, just use a friggin vec4 instead of floats, because you WILL get padding. And it might even be driver-specific padding thanks to bugs.
	*/
	"vec4 lmscale[256];"
	"vec3 eyepos;"
	"float time;"
#ifdef USEBINDLESS
	"struct"
	"{"
		"sampler2DArray d;"
		"sampler2DArray pad;"
	"} materials[256];"
#endif
"};"
"in vec4 v_position;"
"in vec4 v_texcoord;"
"in uvec4 v_styles;"
"in ivec3 v_material;"
"flat out ivec3 material;"	//lightmap, subshape, shape.
"out vec4 tc;"
"flat out uvec4 styles;"
"void main() {"
	"gl_Position = mvp * vec4(v_position.xyz, 1.0);"
	"material = v_material;"
	"if (material.x == -2)"
		"{tc = v_position;}"
	"else"
		"{tc = v_texcoord;}"
	"styles = v_styles;"
"}";
static const char *fragmentprogram =
GLSLVER
#ifdef USEBINDLESS
"#extension GL_ARB_bindless_texture : require\n"
#endif
"layout(std140) uniform MatrixBlock"
"{"
	"mat4 mvp;"
	"vec4 lmscale[256];"
	"vec3 eyepos;"
	"float time;"
#ifdef USEBINDLESS
	"struct"
	"{"
		"sampler2DArray d;"
		"sampler2DArray pad;"
	"} materials[256];"
#endif
"};"
"in vec4 tc;"
"flat in ivec3 material;"	//lightmap, subshape, shape.
"flat in uvec4 styles;"
"out vec4 result;"
#ifdef USEBINDLESS
"sampler2DArray lightmaps;"
#else
"uniform sampler2DArray lightmaps;"
"uniform sampler2DArray tex;"
#endif
"void main() {"

	"int lightmapidx = material.x;"
	"int subshape = material.y;"
#ifdef USEBINDLESS
	"int shape = material.z;"
	"sampler2DArray tex = materials[shape].d;"
#endif

	"if (lightmapidx == -1)"	//hopefully the fact that its flat will count for something.
	"{"	//turb
		"vec3 ntc;"
		"ntc.s = tc.s + sin(tc.t+time)*0.125;"
		"ntc.t = tc.t + sin(tc.s+time)*0.125;"
		"ntc.p = subshape;"
		"result = texture(tex, ntc);"
	"}"
	"else if (lightmapidx == -2)"	//hopefully the fact that its flat will count for something.
	"{"	//sky
		"vec3 dir = tc.xyz - eyepos.xyz;"
		"dir.z *= 3.0;"
		"dir.xy /= 0.5*length(dir);"
		"vec3 btc = vec3(dir.xy + time*0.03125, subshape);"
		"result = texture(tex, btc);"
		"vec3 ctc = vec3(dir.xy + time*0.0625, subshape+1);"
		"vec4 clouds = texture(tex, ctc);"
		"result.rgb = (result.rgb*(1.0-clouds.a)) + clouds.rgb;"
	"}"
	"else"
	"{"	//walls
		"vec4 lm = texture(lightmaps, vec3(tc.zw, lightmapidx));"
		"result = texture(tex, vec3(tc.xy, subshape));"
//		"vec4 fb = texture(tex, vec3(tc.xy, subshape+1));"
		"result.rgb *= dot(lm, vec4(lmscale[styles.x].x,lmscale[styles.y].x,lmscale[styles.z].x,lmscale[styles.w].x));"
//		"result.rgb = (result.rgb*(1.0-fb.a)) + fb.rgb;"
	"}"
"}";

#endif

extern double bench_idxbuf;
extern double bench_draw;
extern double bench_pvs;
extern vid_t *vid;

struct scenestate_s
{
	float modelviewproj[1][16];
	vec4 lmscale[256];
	float eyepos[3];
	float rtime;

#ifdef USEBINDLESS
	struct gmaterials_s materials[256];
#endif
};

typedef struct {
	struct
	{
		float norm[3];
		float dist;
	} tplane[2];
	unsigned int texturenum;
	unsigned int flags;
} mtexinfo_t;

typedef struct msurf_s {
	unsigned int *firstidx;	//not owned by the surface
	unsigned int numidx;
	mtexinfo_t *texinfo;
	unsigned int multiinstance;
	unsigned int markframe;

	struct msurf_s *nextvisible;
} msurf_t;

#define LMSIZE 512

#ifdef USEGL
static GLuint vbo_instanceids;
static const GLuint vbo_instanceids_count = 256;
#endif

class bspmodel : model_c
{
private:

	msurf_t *surfs;
	unsigned int numsurfs;

	mtexinfo_t *texinfo;
	unsigned int numtexinfo;

	enum surftype_e
	{
		SURF_WALL,
		SURF_TURB,
		SURF_SKY
	};

	struct shape_s
	{
		unsigned int width;
		unsigned int height;
		unsigned int depth;
		unsigned int mips;
		surftype_e	type; 

#ifdef USEGL
		GLuint diffuse;
#ifdef USEBINDLESS
		GLuint64 diffuse_handle;
#endif
#endif
#ifdef USEVK
		VkDescriptorSet descriptors;
		vk_image_t image;
		uint32_t batchidxcount;
		uint32_t batchidxfirst;
#endif

		//move these to a proper instance thing, for multidraws.
		size_t numidx;
		size_t firstidx;
	} *shapes;
	unsigned int numshapes;

	struct textures_s
	{
		unsigned int shape;		//which texture array this texture is in.
		unsigned int subshape;	//this texture's layer

		size_t chain_total;
		size_t chain_next;
		char name[16+1];
	} *textures;
	unsigned int numtextures;

	struct rnode_s;
	class submodel : public model_c
	{
	public:
		unsigned int face_first;
		unsigned int face_count;
		unsigned int visleafs;
		struct rnode_s *rootnode;
#ifdef USEGL
		GLuint vao;
		GLuint ebo;
#endif
#ifdef USEBINDLESS
		unsigned int numindexes;	//one per shape
#else
		unsigned int *numindexes;	//one per shape
#endif
#ifdef USEVK
		vk_buffer_t ebo;
#endif

		bspmodel *owner;
	public:
		virtual ~submodel(void)
		{
		}
		virtual void DrawEntity(rentity_t *ent, rstate_t *scene)
		{
			owner->DrawEntitySubmodel(this, ent, scene);
		}
	} *submodels;
	unsigned int numsubmodels;

	struct rnode_s
	{
		int contents;

		unsigned int numsurfs;
		msurf_t **surf;

		rnode_s *child[2];

		vec3 mins, maxs;

		vec3 planenormal;
		vec planedist;

#ifdef USEBINDLESS
		unsigned int visframe;
		struct rnode_s *parent;	//used to flag nodes as within the pvs, when sorting surfaces by depth.
#endif
	};
	struct rnode_s *nodes;
	struct rleaf_s:rnode_s
	{
		unsigned char *vis;	//compressed
	} *leaves;
	unsigned int numleaves;

	struct rleaf_s *lastviewleaf;
	unsigned int pvsrowbytes;
	unsigned char *pvs;
	unsigned int pvslumpsize;
	unsigned int visframe;

	unsigned char *lightmapsrc;
	size_t lightmapsize;

	vec3 relativeviewofs;

	unsigned int markframe;	//leaves mark the surfaces specified by parent nodes.
					//this allows a surface to be in multiple leaves and still only be drawn once.
					//the deepest common node

	unsigned int **ridx_ati;
	unsigned int *ridx;
	unsigned int idxcount;

	unsigned int dib_entries;
#ifndef USEBINDLESS
	struct shape_s **dib_shape;
#endif
#ifdef USEGL
	GLuint dibo;
	GLuint vao;
	GLuint vbo;
	GLuint ebo;
	GLhandleARB shader;
#endif
#ifdef USEVK
	vk_buffer_t ebo;
	vk_buffer_t vbo;
#endif

	unsigned int numverts;
	unsigned int numidx;
	struct vbodata_s
	{
		float org[3];	//xyz
		int lmidx[3];	//lightmap, material, shape
		float st[4];	//base st, lightmap st
		unsigned char lightstyle[4];
	} *vbodata;
	unsigned int *idxdata;

	//this lot is used temporarily
	struct loadtemp_s {
		miptex_t *miptex;
		unsigned int nummiptex;

		bsp_vertex_t *verts;
		unsigned int numverts;

		void *edges;
		unsigned int numedges;

		signed int *edgerefs;
		unsigned int numedgerefs;

		unsigned int numnodes;

		void *facerefs;
		unsigned int numfacerefs;

		bsp_plane_t *planes;
		unsigned int numplanes;

		struct miptexinfo_s 
		{
			unsigned int width;
			unsigned int height;
			surftype_e	type;
		} *miptexinfo;
	} loadtemp;

#ifdef USEGL
	GLuint lightmaptexture;
#endif
	unsigned int lmwidths[LMSIZE];
	typedef struct {
		unsigned char lmblock[LMSIZE*LMSIZE*4];
	} lm_t;
	lm_t **lms;
	unsigned int numlightmaps;

	//decompress the vis into a bitmask of visible leafs.
	void DecompressVis(unsigned char *in, unsigned char *out)
	{
		int b, c;
		unsigned char *end;

		if (in == NULL)	//leaves without any pvs info get filled with lots of info. :)
		{
			for (end = out + pvsrowbytes; out < end; )
				*out++ = 0xff;
		}
		else
		{
			for (end = out + pvsrowbytes; out < end; )
			{
				b = *in;
				if (!b)
				{	//a 0 is always followed by the count of 0s.
					c = in[1];
					in += 2;
#if MISALIGNED_OKAY
					for (; c > 4; c-=4)
					{
						*(unsigned int*)out = 0;
						out+=4;
					}
#endif
					for (; c; c--)
						*out++ = 0;
				}
				else
				{
					in++;
					*out++ = b;
				}
			}
		}
	}

	struct rleaf_s *GetLeafAtOrg(struct rnode_s *rootnode, vec3 org)
	{
		struct rnode_s *rn;
		for (rn = rootnode; !rn->contents; )
			rn = rn->child[((rn->planenormal * org) - rn->planedist) <= 0];
		return (struct rleaf_s*)rn;
	}

#ifdef USEBINDLESS
	//draws near surfaces first
	void WalkNodes_r(struct rnode_s *n, rstate_t *scene)
	{
		int side;

		if (n->visframe != this->visframe)
			return;

		//most qw engines have some extra bitflags to avoid frustum plane checks
		if (!scene->frustum.Contains(n->mins, n->maxs))
			return;

		if (n->contents < 0)
		{	//it's a leaf
			unsigned int i;
			for (i = 0; i < n->numsurfs; i++)
			{
				n->surf[i]->markframe = markframe;
			}
			return;
		}

		side = ((n->planenormal * relativeviewofs) - n->planedist)<0;

		WalkNodes_r(n->child[side], scene);

		unsigned int i;
		msurf_t *s;
		for (i = 0; i < n->numsurfs; i++)
		{	//if a child leaf marked it, add it to the drawing list
			//(we can't add it earlier, as a surface can be listed in multiple child nodes)
			//plus doing it here ensures a nearest-first ordering, which helps with early-z stuff.
			s = n->surf[i];
			if (s->markframe == markframe)
			{
				//maybe we shouldn't do this here, for cache reasons.
				//or maybe we should just have a local buffer for ridx
				for (unsigned int j = 0; j < s->numidx; j++)
					*ridx++ = s->firstidx[j];
			}
		}

		WalkNodes_r(n->child[!side], scene);
	}
#endif

	//draws near surfaces first
	void WalkLeafsSingle(unsigned char *pvs)
	{
		int l;
		struct rleaf_s *leaf;
		for (leaf = leaves + submodels->visleafs, l = submodels->visleafs; l-- > 0 ; leaf--)
		{
			//leaf 0 is not in pvs.
			if (pvs[l>>3] & (1u<<(l&7)))
			{
				for (unsigned int i = 0; i < leaf->numsurfs; i++)
				{	//a surface can be present in multiple leafs (when it crosses a node), so avoid adding the same surface multiple times.
					msurf_t *s = leaf->surf[i];
					if (s->markframe != markframe)
					{
						s->markframe = markframe;
						for (unsigned int j = 0; j < s->numidx; j++)
							*ridx++ = s->firstidx[j];
					}
				}
			}
		}
	}
	void WalkLeafsMulti(unsigned char *pvs)
	{
		int l;
		struct rleaf_s *leaf;
		for (leaf = leaves + submodels->visleafs, l = submodels->visleafs; l-- > 0 ; leaf--)
		{
			//leaf 0 is not in pvs.
			if (pvs[l>>3] & (1u<<(l&7)))
			{
				for (unsigned int i = 0; i < leaf->numsurfs; i++)
				{	//a surface can be present in multiple leafs (when it crosses a node), so avoid adding the same surface multiple times.
					msurf_t *s = leaf->surf[i];
					if (s->markframe != markframe)
					{
						s->markframe = markframe;
						for (unsigned int j = 0; j < s->numidx; j++)
							*ridx_ati[s->multiinstance]++ = s->firstidx[j];
					}
				}
			}
		}
	}

#ifdef USEGL
	int oldmode;
	inline void WalkNodes(rentity_t *ent, rstate_t *scene)
	{
		int rendermode;
		double time1 = vid->GetTime();
		double time2;

		rendermode = scene->mode;
		if (ent->matrix[12] || ent->matrix[13] || ent->matrix[14])
		{	//frustum culling cannot deal with translated/rotated models
//			if (rendermode != MODE_PVSONLY_MULTI)
				rendermode = MODE_WHOLEWORLD_MULTI;
//			else
//				rendermode = MODE_PVSONLY_MULTI;
		}
		if (rendermode >= MODE_MAX)
			rendermode = MODE_PVSONLY_MULTI;

		if (rendermode == MODE_WHOLEWORLD_MULTI)
		{
			BruteFaces(submodels, ent, scene);
			return;
		}

		pglActiveTexture(GL_TEXTURE1);
		pglBindTexture(GL_TEXTURE_2D_ARRAY, lightmaptexture);
		pglActiveTexture(GL_TEXTURE0);

		//switch to the correct vao, we may be updating its element buffer.
		pglBindVertexArray(vao);
		pglUseProgram(shader);

		//one of the other modes may have obliterated it.
		if (oldmode != rendermode)
		{
			lastviewleaf = NULL;
			idxcount = 0;

			//update how we stream the various verts
			if (rendermode == MODE_WHOLEWORLD_MULTI)
			{
				pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), idxdata, GL_STATIC_DRAW);

				struct indirectdrawcmd_s *indirectdraw = (struct indirectdrawcmd_s*)_alloca(sizeof(*indirectdraw) * numshapes);
				dib_entries = 0;
				for (size_t i = 0; i < numshapes; i++)
				{
					indirectdraw[dib_entries].count = (GLuint)shapes[i].numidx;
					indirectdraw[dib_entries].instanceCount = 1;
					indirectdraw[dib_entries].firstIndex = (GLuint)shapes[i].firstidx;
					indirectdraw[dib_entries].baseVertex = 0;
					indirectdraw[dib_entries].baseInstance = 0;
#ifndef USEBINDLESS
					dib_shape[dib_entries] = &shapes[i];
					if (indirectdraw[dib_entries].count)
						dib_entries++;
#endif
				}
				if (!dibo)
					pglGenBuffers(1, &dibo);
				pglBindBuffer(GL_DRAW_INDIRECT_BUFFER, dibo);
				pglBufferData(GL_DRAW_INDIRECT_BUFFER, sizeof(*indirectdraw) * dib_entries, indirectdraw, GL_STATIC_DRAW);
			}
#ifdef USEBINDLESS
			else if (rendermode == MODE_WHOLEWORLD_NV)
			{
				pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), idxdata, GL_STATIC_DRAW);
			}
			else if (rendermode == MODE_FRUSTUM_NV)
			{
				pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), NULL, GL_STREAM_DRAW);
			}
			else if (rendermode == MODE_PVSONLY_NV)
			{
//				pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), NULL, GL_DYNAMIC_DRAW);
			}
#endif
			else //if (rendermode == MODE_PVSONLY_MULTI)
			{
//				pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), NULL, GL_DYNAMIC_DRAW);
			}
			oldmode = rendermode;
		}

		if (rendermode == MODE_WHOLEWORLD_MULTI)
		{
			time2 = time1;
			idxcount = submodels[0].face_count-1;
			idxcount = surfs[idxcount].firstidx + surfs[idxcount].numidx - idxdata;
		}
#ifdef USEBINDLESS
		else if (rendermode == MODE_WHOLEWORLD_NV)
		{
			time2 = time1;
			idxcount = submodels[0].face_count-1;
			idxcount = surfs[idxcount].firstidx + surfs[idxcount].numidx - idxdata;
		}
		else if (rendermode == MODE_FRUSTUM_NV)
		{
			struct rleaf_s *viewleaf = GetLeafAtOrg(submodels[0].rootnode, relativeviewofs);
			if (lastviewleaf != viewleaf)
			{
				unsigned int i;
				struct rnode_s *rn;
				unsigned char decompressedvis[(MAX_MAP_LEAFS+7)/8];
				lastviewleaf = viewleaf;
				DecompressVis(viewleaf->vis, decompressedvis);
				visframe++;
				for (i = 0; i < this->numleaves-1; i++)
				{
					if (decompressedvis[i>>3] & (1<<(i&7)))
					{	//mark the leaf for this frame as well as it's parents (but don't bother marking parents if they're already marked
						for (rn = this->leaves+i+1; rn; rn = rn->parent)
						{
							if (rn->visframe == visframe)
								break;
							rn->visframe = visframe;
						}
					}
				}
			}
			time2 = vid->GetTime();

			//update the element ebo according to vis (note that this would require rebuilding the ebo if pvs+frustum checks were disabled.
			unsigned int *ridxstart = ridx = (unsigned int*)pglMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, numidx * sizeof(*ridx), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
			if (ridx)
			{	//mapbuffer is technically allowed to fail.
				WalkNodes_r(submodels[0].rootnode, scene);
				markframe++;
				pglFlushMappedBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, (ridx - ridxstart) * sizeof(*ridx));
				pglUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);

				idxcount = (ridx - ridxstart);
			}
			else
				idxcount = 0;
		}
		else if (rendermode == MODE_PVSONLY_NV)
		{	//no frustum checks, pvs only, cached between frames.
			//no depth sorting.
			//this could be improved by splitting the world into quadrants or so, which could allow simplistic 90-degree frustum checks or something (would need multidraws or something).
			//when porting this to an existing quake engine, the lack of bindless textures will result in extra draw calls, which the engine will need to deal with somehow.
			struct rleaf_s *viewleaf = GetLeafAtOrg(submodels[0].rootnode, relativeviewofs);
			if (lastviewleaf != viewleaf)
			{
				unsigned char decompressedvis[(MAX_MAP_LEAFS+7)/8];
				lastviewleaf = viewleaf;
				DecompressVis(viewleaf->vis, decompressedvis);
				visframe++;

				time2 = vid->GetTime();

				//update the element ebo according to vis (note that this would require rebuilding the ebo if pvs+frustum checks were disabled.
#if USEMAPBUFFER
				unsigned int *ridxstart = ridx = (unsigned int*)pglMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, numidx * sizeof(*ridx), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
#else
				unsigned int *ridxstart = ridx = new unsigned int[numidx];
#endif
				if (ridxstart)
				{	//mapbuffer is technically allowed to fail.

					markframe++;
					WalkLeafsSingle(decompressedvis);

#if USEMAPBUFFER
					pglFlushMappedBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, (ridx - ridxstart) * sizeof(*ridxstart));
					pglUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
#else
					pglBufferData(GL_ELEMENT_ARRAY_BUFFER, (GLsizei)((ridx - ridxstart) * sizeof(*ridxstart)), ridxstart, GL_STATIC_DRAW);
					delete[] ridxstart;
#endif


					idxcount = (ridx - ridxstart);
				}
				else
					idxcount = 0;
			}
			else
				time2 = vid->GetTime();
		}
#endif
		else //if (rendermode == MODE_PVSONLY_MULTI)
		{
			struct rleaf_s *viewleaf;
			if (numsubmodels == 1)
				viewleaf = leaves;	//put it in the solid leaf, so it opens up pvs and doesn't result in each ammobox rewriting index lists.
			else
				viewleaf = GetLeafAtOrg(submodels[0].rootnode, relativeviewofs);
			if (lastviewleaf != viewleaf)
			{
				unsigned char decompressedvis[(MAX_MAP_LEAFS+7)/8];
				lastviewleaf = viewleaf;
				DecompressVis(viewleaf->vis, decompressedvis);
				visframe++;

				time2 = vid->GetTime();

				//update the element ebo according to vis (note that this would require rebuilding the ebo if pvs+frustum checks were disabled.
#if USEMAPBUFFER
				unsigned int *ridxstart = (unsigned int*)pglMapBufferRange(GL_ELEMENT_ARRAY_BUFFER, 0, numidx * sizeof(*ridx), GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_BUFFER_BIT | GL_MAP_FLUSH_EXPLICIT_BIT);
#else
				unsigned int *ridxstart = new unsigned int[numidx];
#endif
				if (ridxstart)
				{	//mapbuffer is technically allowed to fail.
					ridx_ati = (unsigned int**)_alloca(sizeof(unsigned int*) * numshapes);

					for (size_t i = 0; i < numshapes; i++)
						ridx_ati[i] = ridxstart + shapes[i].firstidx;
					markframe++;
					WalkLeafsMulti(decompressedvis);
#if USEMAPBUFFER
					for (size_t i = 0; i < numshapes; i++)
						pglFlushMappedBufferRange(GL_ELEMENT_ARRAY_BUFFER, (ridx_ati[i] - ridxstart)*sizeof(*ridxstart), (ridx_ati[i] - (ridxstart + shapes[i].firstidx))*sizeof(*ridxstart));
					pglUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER);
#else
					pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx * sizeof(*ridxstart), ridxstart, GL_STATIC_DRAW);
					delete[] ridxstart;
#endif

					struct indirectdrawcmd_s *indirectdraw = (struct indirectdrawcmd_s*)_alloca(sizeof(*indirectdraw) * numshapes);

					dib_entries = 0;
					for (size_t i = 0; i < numshapes; i++)
					{
						indirectdraw[dib_entries].count = (GLuint)(ridx_ati[i] - (ridxstart + shapes[i].firstidx));
						if (!indirectdraw[dib_entries].count)
							continue;
						indirectdraw[dib_entries].instanceCount = 1;
						indirectdraw[dib_entries].firstIndex = ridx_ati[i]-indirectdraw[dib_entries].count - ridxstart;
						indirectdraw[dib_entries].baseVertex = 0;
						indirectdraw[dib_entries].baseInstance = 0;
#ifndef USEBINDLESS
						dib_shape[dib_entries] = &shapes[i];
#endif
						dib_entries++;
					}
					if (!dibo)
						pglGenBuffers(1, &dibo);
					pglBindBuffer(GL_DRAW_INDIRECT_BUFFER, dibo);
					pglBufferData(GL_DRAW_INDIRECT_BUFFER, sizeof(*indirectdraw) * dib_entries, indirectdraw, GL_STATIC_DRAW);
				}
			}
			else
				time2 = vid->GetTime();
		}

		double time3 = vid->GetTime();

#ifdef USEBINDLESS
		if (rendermode == MODE_PVSONLY_NV || rendermode == MODE_FRUSTUM_NV || rendermode == MODE_WHOLEWORLD_NV)
		{
			//oh look. its our single draw call!
			pglDrawElements(GL_TRIANGLES, (idxcount), GL_UNSIGNED_INT, NULL);
		}
		else
#endif
		//if (rendermode == MODE_PVSONLY_MULTI)
		{
			//multi instead of instancing because they have different index buffers (different offsets in a single ebo)
			//I guess we could use instanced drawing with a load of discards... but yuck.
			pglBindBuffer(GL_DRAW_INDIRECT_BUFFER, dibo);	//not part of vaos
			//FIXME: this dibo thing should be regenerated each frame, with its own set of (active) models etc. a per-instance uniform array index with texture+transform would be perfect for submodels.
#ifndef USEBINDLESS
			for (unsigned int i = 0; i < dib_entries; i++)
			{
				struct indirectdrawcmd_s *nul = NULL;
				pglBindTexture(GL_TEXTURE_2D_ARRAY, dib_shape[i]->diffuse);
				pglMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, nul + i, 1, 0);
			}
#else
			pglMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, NULL, dib_entries, 0);
#endif
		}
		double time4 = vid->GetTime();

		bench_draw = time4 - time3;
		bench_idxbuf = time3 - time2;
		bench_pvs = time2 - time1;

		//and in case we do 2d stuff.
		pglBindVertexArray(0);
		pglUseProgram(0);
	}

	void UpdateVAO(void)
	{	//current vao must be bound
		//will not update the index buffer
		CheckGLError();

		
		GLint r = pglGetAttribLocation(shader,  "v_position");
#if 0
		pglVertexPointer(3, GL_FLOAT, sizeof(*vbodata), (void*)((char*)vbodata->org - (char*)vbodata));
		pglEnableClientState(GL_VERTEX_ARRAY);
#else
//		pglDisableClientState(GL_VERTEX_ARRAY);
		pglVertexAttribPointer(r, 3, GL_FLOAT, FALSE, sizeof(*vbodata), (void*)((char*)vbodata->org - (char*)vbodata));
		pglEnableVertexAttribArray(r);
#endif

		CheckGLError();

		r = pglGetAttribLocation(shader,  "v_material");
		if (r != -1)
		{
			pglVertexAttribIPointer(r, 3, GL_INT, sizeof(*vbodata), (void*)((char*)vbodata->lmidx - (char*)vbodata));
			pglEnableVertexAttribArray(r);
		}

		CheckGLError();

		r = pglGetAttribLocation(shader,  "v_styles");
		if (r != -1)
		{
			pglVertexAttribIPointer(r, 4, GL_UNSIGNED_BYTE, sizeof(*vbodata), (void*)((char*)vbodata->lightstyle - (char*)vbodata));
			pglEnableVertexAttribArray(r);
		}

		CheckGLError();

		r = pglGetAttribLocation(shader,  "v_texcoord");
		if (r != -1)
		{
			pglVertexAttribPointer(r, 4, GL_FLOAT, FALSE, sizeof(*vbodata), (void*)((char*)vbodata->st - (char*)vbodata));
			pglEnableVertexAttribArray(r);
		}

		CheckGLError();

/*		r = pglGetAttribLocation(shader, "v_drawid");
		if (r != -1)
		{
			if (!vbo_instanceids)
			{
				pglGenBuffers(1, &vbo_instanceids);
				pglBindBuffer(GL_ARRAY_BUFFER, vbo_instanceids);
				pglBufferData(GL_ARRAY_BUFFER, sizeof(unsigned int)*vbo_instanceids_count, NULL, GL_STATIC_DRAW);
				unsigned int *tmpptr = (unsigned int *)pglMapBufferRange(GL_ARRAY_BUFFER, 0, sizeof(unsigned int)*vbo_instanceids_count, GL_MAP_WRITE_BIT);
				for (unsigned int i = 0; i < vbo_instanceids_count; i++)
					tmpptr[i] = i;
				pglUnmapBuffer(GL_ARRAY_BUFFER);	//technically this is allowed to fail.
			}
			else
				pglBindBuffer(GL_ARRAY_BUFFER, vbo_instanceids);
			pglVertexAttribIPointer(r, 1, GL_UNSIGNED_INT, sizeof(unsigned int), (void*)0);
			pglEnableVertexAttribArray(r);
			pglVertexAttribDivisor(r, 1);
		}
*/
		CheckGLError();
	}

	//draw a model without any regard for pvs
	inline void BruteFaces(submodel *sub, rentity_t *ent, rstate_t *scene)
	{
		unsigned int *idx;
		if (!sub->vao)
		{
			unsigned int f;
			unsigned int ic;
#ifdef USEBINDLESS
			for (f = 0, ic = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				ic += surf->numidx;
			}
			idx = new unsigned int[ic];
			sub->numindexes = ic;
			for (f = 0, ic = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				memcpy(idx+ic, surf->firstidx, sizeof(*idx) * surf->numidx);
				ic += surf->numidx;
			}
#else
			unsigned int s;
			unsigned int *firstidx = (unsigned int*)alloca(sizeof(unsigned int) * numshapes);
			sub->numindexes = new unsigned int[numshapes];
			for (s = 0; s < numshapes; s++)
				sub->numindexes[s] = 0;
			for (f = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				sub->numindexes[surf->multiinstance] += surf->numidx;
			}
			for (s = 0, ic = 0; s < numshapes; s++)
			{
				firstidx[s] = ic;
				ic += sub->numindexes[s];
				sub->numindexes[s] = 0;
			}
			idx = new unsigned int[ic];
			for (f = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				memcpy(idx+firstidx[surf->multiinstance]+sub->numindexes[surf->multiinstance], surf->firstidx, sizeof(*idx) * surf->numidx);
				sub->numindexes[surf->multiinstance] += surf->numidx;
			}
#endif
		
			//generate our vao
			pglGenVertexArrays(1, &sub->vao);
			pglGenBuffers(1, &sub->ebo);
			pglBindVertexArray(sub->vao);
			pglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, sub->ebo);
			pglBindBuffer(GL_ARRAY_BUFFER, vbo);	//reuse the main vbo

			//give it our private idx data
			pglBufferData(GL_ELEMENT_ARRAY_BUFFER, ic*sizeof(*idx), idx, GL_STATIC_DRAW);

			UpdateVAO();
		}
		else
			pglBindVertexArray(sub->vao);

		pglUseProgram(shader);
		pglActiveTexture(GL_TEXTURE1);
		pglBindTexture(GL_TEXTURE_2D_ARRAY, lightmaptexture);
		pglActiveTexture(GL_TEXTURE0);

#ifdef USEBINDLESS
		pglDrawElements(GL_TRIANGLES, sub->numindexes, GL_UNSIGNED_INT, NULL);
#else
		idx = NULL;
		for (unsigned int s = 0; s < numshapes; s++)
		{
			if (sub->numindexes[s])
			{
				pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[s].diffuse);
				pglDrawElements(GL_TRIANGLES, sub->numindexes[s], GL_UNSIGNED_INT, idx);
				idx += sub->numindexes[s];
			}
		}
#endif

		//and in case we do 2d stuff.
		pglBindVertexArray(0);
		pglUseProgram(0);
	}
#endif
#ifdef USEVK
	inline void BruteFaces(submodel *sub, rentity_t *ent, rstate_t *scene)
	{
		if (!sub->ebo.buffer)
		{
			unsigned int f;
			unsigned int ic;
			unsigned int s;
			unsigned int *firstidx = (unsigned int*)alloca(sizeof(unsigned int) * numshapes);
			sub->numindexes = new unsigned int[numshapes];
			for (s = 0; s < numshapes; s++)
				sub->numindexes[s] = 0;
			for (f = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				sub->numindexes[surf->multiinstance] += surf->numidx;
			}
			for (s = 0, ic = 0; s < numshapes; s++)
			{
				firstidx[s] = ic;
				ic += sub->numindexes[s];
				sub->numindexes[s] = 0;
			}
			unsigned int *idx = new unsigned int[ic];
			for (f = 0; f < sub->face_count; f++)
			{
				msurf_t *surf = &surfs[sub->face_first + f];
				memcpy(idx+firstidx[surf->multiinstance]+sub->numindexes[surf->multiinstance], surf->firstidx, sizeof(*idx) * surf->numidx);
				sub->numindexes[surf->multiinstance] += surf->numidx;
			}

			sub->ebo = CreateBuffer(ic*sizeof(*idx), VK_BUFFER_USAGE_INDEX_BUFFER_BIT, false);
			FillBufferSync(&sub->ebo, 0, ic*sizeof(*idx), idx);
			delete[] idx;
		}

		uint32_t permu = ~0;
		pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, 0, 1, &descriptors, 0, NULL);
		pvkCmdPushConstants(vkmaincmd, pipeLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ent->matrix.data), ent->matrix.data);
		pvkCmdBindIndexBuffer(vkmaincmd, sub->ebo.buffer, 0, VK_INDEX_TYPE_UINT32);
		const VkDeviceSize offsets[1] = {0};
		pvkCmdBindVertexBuffers(vkmaincmd, 0, 1, &vbo.buffer, offsets);

		uint32_t idx = 0;
		for (unsigned int s = 0; s < numshapes; s++)
		{
			if (sub->numindexes[s])
			{
				if (permu != shapes[s].type)
				{
					permu = shapes[s].type;
					pvkCmdBindPipeline(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline[permu]);
				}
				pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, 1, 1, &shapes[s].descriptors, 0, NULL);
				pvkCmdDrawIndexed(vkmaincmd, sub->numindexes[s], 1, idx, 0, 0);
				idx += sub->numindexes[s];
			}
		}
	}
#endif

protected:
	virtual void DrawEntitySubmodel(submodel *sub, rentity_t *ent, rstate_t *scene)
	{
		int textime = scene->time;
		for (size_t i = 0; i < numtextures; i++)
		{
			size_t a = (i+(int)(textime)) % numtextures;
			a = i;
			if (textures[i].chain_total)
			{
				size_t j = textime % textures[a].chain_total;
				while (j --> 0)
					a = textures[a].chain_next;
			}
		}

		struct scenestate_s screencbuf;

		for (int i = 0; i < sizeof(screencbuf.lmscale)/sizeof(screencbuf.lmscale[0]); i++)
		{
			screencbuf.lmscale[i].v[0] = scene->lmscale[i];
		}
#ifdef USEVK
		memcpy(screencbuf.modelviewproj[0], scene->viewproj.data, sizeof(float)*16);
#else
		memcpy(screencbuf.modelviewproj[0], (scene->viewproj * ent->matrix).data, sizeof(float)*16);
#endif
		memcpy(screencbuf.eyepos, scene->vieworigin.v, sizeof(screencbuf.eyepos));
		screencbuf.rtime = scene->time;


#ifdef USEBINDLESS
		for (size_t i = 0; i < numshapes; i++)
		{
			screencbuf.materials[i].d = shapes[i].diffuse_handle;
		}
#endif


#ifdef USEGL
		static GLuint cbuf;
		if (!cbuf)
		{
			pglGenBuffers(1, &cbuf);
			pglBindBuffer(GL_UNIFORM_BUFFER, cbuf);
		}
	
		pglBindBuffer(GL_UNIFORM_BUFFER, cbuf);
		pglBufferData(GL_UNIFORM_BUFFER, sizeof(screencbuf), &screencbuf, GL_STREAM_DRAW);
		pglBindBufferBase(GL_UNIFORM_BUFFER, 0, cbuf);


//		pglBindTexture(GL_TEXTURE_2D, mat[0].d);
#endif
#ifdef USEVK
		void *ubodata;
		VkAssert(pvkMapMemory(vkdevice, ubo.memory, 0, sizeof(screencbuf), 0, (void**)&ubodata));
		if (ubodata)
		{
			memcpy(ubodata, &screencbuf, sizeof(screencbuf));
			pvkUnmapMemory(vkdevice, ubo.memory);
		}
#endif
		relativeviewofs = scene->vieworigin;

		//fixme: deal with rotations.
		relativeviewofs[0] -= ent->matrix[12];
		relativeviewofs[1] -= ent->matrix[13];
		relativeviewofs[2] -= ent->matrix[14];

		if (sub == submodels)
			WalkNodes(ent, scene);
		else
			BruteFaces(sub, ent, scene);
	}

public:

	virtual void DrawEntity(rentity_t *ent, rstate_t *scene)
	{
		DrawEntitySubmodel(submodels, ent, scene);
	}
	friend submodel;
	virtual model_c *GetSubmodel(unsigned int idx)
	{
		if (idx < numsubmodels)
			return &submodels[idx];
		return NULL;
	}

	bspmodel(void)
	{
		lastviewleaf = NULL;
		visframe = 0;
		markframe = 0;
		numverts = 0;
		numidx = 0;
		ridx = NULL;

		cullradius = 4096;

		submodels = NULL;

		vbodata = NULL;
		idxdata = NULL;
#ifdef USEGL
		dibo = 0;
#endif

		numlightmaps = 0;
		lms = NULL;
	}
	virtual ~bspmodel(void)
	{
#ifdef USEVK
		DestroyBuffer(&ebo);
		DestroyBuffer(&vbo);
		DestroyBuffer(&ubo);

		for (uint32_t i = 0; i < countof(pipeline); i++)
			pvkDestroyPipeline(vkdevice, pipeline[i], vkallocationcb);
		for (uint32_t i = 0; i < numshapes; i++)
			DestroyTexture(&shapes[i].image);
		DestroyTexture(&lightmaps);
		for (uint32_t i = 0; i < countof(descSetLayout); i++)
		{
			pvkResetDescriptorPool(vkdevice, descPools[i], 0);
			pvkDestroyDescriptorPool(vkdevice, descPools[i], vkallocationcb);
			pvkDestroyDescriptorSetLayout(vkdevice, descSetLayout[i], vkallocationcb);
		}
		pvkDestroyPipelineLayout(vkdevice, pipeLayout, vkallocationcb);
#endif

		delete[] surfs;
		delete[] texinfo;
#ifdef USEGL
		for (unsigned int i = 0; i < numshapes; i++)
		{
			pglDeleteTextures(1, &shapes[i].diffuse);
		}
#endif
		delete[] textures;	//only the array itself needs freeing
		delete[] nodes;
		delete[] leaves;
		delete[] pvs;
		delete[] lightmapsrc;
		delete[] idxdata;
	}











private:
/*
========================================================================================
	loading code
========================================================================================
*/
	void LoadVerticies(bsp_vertex_t *in, unsigned int lumpsize)
	{
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of verticies lump");

		loadtemp.numverts = lumpsize / sizeof(*in);
		loadtemp.verts = in;
	}

	template <typename dedge_t> void LoadEdges(dedge_t *in, unsigned int lumpsize)
	{
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of edges lump");

		loadtemp.numedges = lumpsize / sizeof(*in);
		loadtemp.edges = in;	//used temporarily
	}

	void LoadSurfEdges(signed int *in, unsigned int lumpsize)
	{
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of surfedges lump");

		loadtemp.numedgerefs = lumpsize / sizeof(*in);
		loadtemp.edgerefs = in;	//used temporarily
	}

	unsigned int FindAnimTexture(size_t id, char *name, unsigned int width, unsigned int height)
	{
		for (unsigned int i = 0; i < numtextures; i++)
		{
			if (textures[i].name[0] == '+' && textures[i].name[1] == '0'+id && !strcmp(name, textures[i].name+2) && loadtemp.miptexinfo[i].width == width && loadtemp.miptexinfo[i].height == height)
				return i;
		}
		return -1;
	}

	void LoadTextures(bsp_miplump_t *src, unsigned int lumpsize)
	{
		unsigned int i;
		miptex_t *in;
		char *srcend = (char*)src + lumpsize;

		unsigned char *quakepalette = FS_ReadFile("gfx/palette.lmp", NULL);
		if (!quakepalette)
		{
			Com_Printf(PRINT_ERROR, "WARNING: NO PALETTE FOUND. Making something up.\n");
			quakepalette = new unsigned char[768];
			for (i = 0; i < 768; i++)
			{	//this is a really lame and crap representation of the quake palette. enjoy.
				quakepalette[i] = ((i/3)&15)<<4;
				if (i >= 768/2)
					quakepalette[i] = 255-quakepalette[i];
			}
		}

		shapes = NULL;
		numshapes = 0;
		numtextures = src->numtex;
		textures = new struct textures_s[numtextures];

		loadtemp.miptexinfo = new loadtemp_s::miptexinfo_s[numtextures];
		for (i = 0; i < numtextures; i++)
		{
			in = (miptex_t *)((unsigned char *)src + src->offset[i]);
			loadtemp.miptexinfo[i].width = in->width;
			loadtemp.miptexinfo[i].height = in->height;
			if (!_strnicmp(in->name, "sky", 3))
				loadtemp.miptexinfo[i].type = SURF_SKY;
			else if (!_strnicmp(in->name, "*", 1))
				loadtemp.miptexinfo[i].type = SURF_TURB;
			else
				loadtemp.miptexinfo[i].type = SURF_WALL;
			if (loadtemp.miptexinfo[i].type == SURF_SKY)
			{
				if (in->width & 1)
					loadtemp.miptexinfo[i].type = SURF_WALL;	//its not sky...
				else
					loadtemp.miptexinfo[i].width /= 2;
			}

			memcpy(textures[i].name, in->name, 16); textures[i].name[16] = 0;
			textures[i].chain_total = 0;
			textures[i].chain_next = i;

			unsigned int j;
#ifndef NOMERGETEXTURES
			for (j = 0; j < numshapes; j++)
			{
				if (shapes[j].width == loadtemp.miptexinfo[i].width && shapes[j].height == loadtemp.miptexinfo[i].height && shapes[j].type == loadtemp.miptexinfo[i].type)
				{	//remap the first one's subshape, because we can.
					textures[i].shape = j;
					textures[i].subshape = shapes[j].depth++;
					break;
				}
			}
#else
			j = numshapes;
#endif
			if (j == numshapes)
			{
				int w;
				int h;
				numshapes++;
				shapes = (struct shape_s*)realloc(shapes, sizeof(*shapes)*numshapes);
				shapes[j].depth = 1;
				shapes[j].width = loadtemp.miptexinfo[i].width;
				shapes[j].height = loadtemp.miptexinfo[i].height;
#ifdef USEVK
				shapes[j].type = loadtemp.miptexinfo[i].type;
#else
				shapes[j].type = SURF_WALL;	//too lazy to do permutations with gl
#endif
				shapes[j].mips = 1;
				shapes[j].numidx = 0;

				w = shapes[j].width;
				h = shapes[j].height;
				for (shapes[j].mips = 0; w || h; shapes[j].mips++)
				{
					w>>=1;
					h>>=1;
				}

				textures[i].shape = j;
				textures[i].subshape = 0;
			}
		}

#ifndef USEBINDLESS
		dib_shape = new struct shape_s*[numshapes];	//for rendering to cache stuff a bit more.
#endif

#ifdef USEGL
		CheckGLError();

		for (i = 0; i < numshapes; i++)
		{
			int mips = shapes[i].mips;
			int layers = shapes[i].depth*2; //diffuse, fullbright
			int width = shapes[i].width;
			int height = shapes[i].height;

			pglGenTextures(1, &shapes[i].diffuse);
			pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[i].diffuse);
			pglTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_S, GL_REPEAT);
			pglTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_WRAP_T, GL_REPEAT);
			pglTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
			pglTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
			if (pglTexStorage3D)
				pglTexStorage3D(GL_TEXTURE_2D_ARRAY, mips, GL_RGBA8, width, height, layers);
/*			else
			{
				pglTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAX_LEVEL, mips-1);
				for (int j = 0; j < mips; j++, width/=2, height/=2)
					pglTexImage3D(GL_TEXTURE_2D_ARRAY, j, GL_RGBA8, width, height, layers, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
			}*/
		}

		CheckGLError();
#endif
#ifdef USEVK
		for (i = 0; i < numshapes; i++)
		{
			int mips = shapes[i].mips;
			int layers = shapes[i].depth*2; //diffuse, fullbright
			int width = shapes[i].width;
			int height = shapes[i].height;

			//fixme: should allocate a single block of memory and then suballocate, but whatever. this is quake content.
			shapes[i].image = CreateTexture2DArray(width, height, layers, mips);
			CreateTextureSampler(&shapes[i].image, true, true);
		}
#endif

		//figure out animation chains
		for (i = 0; i < numtextures; i++)
		{
			if (textures[i].name[0] == '+' && textures[i].name[1] == '0')
			{
				int list[10];
				int cnt;
				list[0] = i;
				for (cnt = 1; cnt < 10; cnt++)
				{
					list[cnt] = FindAnimTexture(cnt, &textures[i].name[2], loadtemp.miptexinfo[i].width, loadtemp.miptexinfo[i].height);
					if(list[cnt] == -1)
						break;
				}
				for (int j = 0; j < cnt; j++)
				{
					textures[list[j]].chain_next = list[(j+1)%cnt];
					textures[list[j]].chain_total = cnt;
				}
			}
		}

		for (i = 0; i < numtextures; i++)
		{
			int shape = textures[i].shape;
			int diffuse = textures[i].subshape*2;
			int fullbright = textures[i].subshape*2+1;
			if (src->offset[i] == -1)
			{
				static miptex_t dummymip;
				in = &dummymip;
			}
			else
			{
				in = (miptex_t *)((unsigned char *)src + src->offset[i]);
				assert((char*)in > (char*)src->offset && (char*)(in+1) < srcend);
			}

			if (!in->offset[0])
			{	//mips with no level 0 are external. we should probably try to load them from disk, but we don't support that.
				int l;
				unsigned int width = in->width;
				if (loadtemp.miptexinfo[i].type == SURF_SKY)
					width >>= 1;

				for (l = 0; l < 1; l++)
				{
					unsigned char *out = new unsigned char[4*(width>>l)*(in->height>>l)];
					size_t j = 0;
					for (size_t y = 0; y < (in->height>>l); y++)
					{
						for (size_t x = 0; x < (width>>l); x++)
						{
							out[j+0] = (unsigned char)((x^y)&1) * 255;
							out[j+1] = (unsigned char)!((x^y)&1) * 255;
							out[j+2] = 0;
							out[j+3] = 255;
							j+=4;
						}
					}
#ifdef USEGL
					pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[shape].diffuse);
					pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, l, 0, 0, diffuse, width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, out);
					CheckGLError();
#endif
#ifdef USEVK
					FillTextureSync(shapes[shape].image, width, in->height, diffuse, ~0, out);
#endif
					delete[] out;
				}
				for (l = 0; l < 1; l++)
				{
					unsigned char *out = new unsigned char[4*(width>>l)*(in->height>>l)];
					for (size_t j = (width>>l)*(in->height>>l); j-- > 0 ;)
					{
						out[j*4+0] = 0;
						out[j*4+1] = 0;
						out[j*4+2] = 0;
						out[j*4+3] = 0;
					}
#ifdef USEGL
					pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[shape].diffuse);
					pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, l, 0, 0, fullbright, width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, out);
					CheckGLError();
#endif
#ifdef USEVK
					FillTextureSync(shapes[shape].image, width, in->height, fullbright, ~0, out);
#endif
					delete[] out;
				}
			}
			else if (loadtemp.miptexinfo[i].type == SURF_SKY)
			{
				unsigned int width = in->width>>1;
				//double-width (we already halved that)
				unsigned char *outbase = new unsigned char[4*width*in->height];
				unsigned char *outfb = new unsigned char[4*width*in->height];
				unsigned char *data = (unsigned char*)in + in->offset[0];
				assert((char*)data > (char*)src && (char*)data + in->width*in->height < srcend);
				for (size_t y = 0; y < in->height; y++)
				{
					for (size_t x = 0; x < width; x++, data++)
					{
						if (*data == 0)
						{	//premultiply it, with 0 being invisible.
							*outfb++ = 0;
							*outfb++ = 0;
							*outfb++ = 0;
							*outfb++ = 0;
						}
						else
						{
							*outfb++ = quakepalette[*data*3+0];
							*outfb++ = quakepalette[*data*3+1];
							*outfb++ = quakepalette[*data*3+2];
							*outfb++ = 255;
						}
					}
					for (size_t x = 0; x < width; x++, data++)
					{
						*outbase++ = quakepalette[*data*3+0];
						*outbase++ = quakepalette[*data*3+1];
						*outbase++ = quakepalette[*data*3+2];
						*outbase++ = 255;
					}
				}

				outbase -= 4*width*in->height;
				outfb -= 4*width*in->height;

#ifdef USEGL
				pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[shape].diffuse);
				pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, 0, 0, 0, diffuse, width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, outbase);
				pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, 0, 0, 0, fullbright, width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, outfb);
				CheckGLError();
#endif
#ifdef USEVK
				FillTextureSync(shapes[shape].image, width, in->height, diffuse, ~0, outbase);
				FillTextureSync(shapes[shape].image, width, in->height, fullbright, ~0, outfb);
#endif

				delete[] outbase;
				delete[] outfb;
			}
			else
			{
				int l;

				for (l = 0; l < 1; l++)
				{
					unsigned char *out = new unsigned char[4*(in->width>>l)*(in->height>>l)];
					if (in->offset[l])
					{
						unsigned char *data = (unsigned char*)in + in->offset[l];
						assert((char*)data > (char*)src && (char*)data + in->width*in->height < srcend);
						for (size_t j = (in->width>>l)*(in->height>>l); j-- > 0 ;)
						{
							out[j*4+0] = quakepalette[data[j]*3+0];
							out[j*4+1] = quakepalette[data[j]*3+1];
							out[j*4+2] = quakepalette[data[j]*3+2];
							out[j*4+3] = 255;
						}
					}
					else
					{
						for (size_t j = (in->width>>l)*(in->height>>l); j-- > 0 ;)
						{
							out[j*4+0] = quakepalette[0*3+0];
							out[j*4+1] = quakepalette[0*3+1];
							out[j*4+2] = quakepalette[0*3+2];
							out[j*4+3] = 255;
						}
					}
#ifdef USEGL
					pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[shape].diffuse);
					pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, l, 0, 0, diffuse, in->width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, out);
					CheckGLError();
#endif
#ifdef USEVK
					FillTextureSync(shapes[shape].image, in->width, in->height, diffuse, ~0, out);
#endif
					delete[] out;
				}
				for (l = 0; l < 1; l++)
				{
					unsigned char *out = new unsigned char[4*(in->width>>l)*(in->height>>l)];
					if (in->offset[l])
					{
//FIXME: assert in->offsets[l]
						unsigned char *data = (unsigned char*)in + in->offset[l];
						for (size_t j = (in->width>>l)*(in->height>>l); j-- > 0 ;)
						{
							if (data[j] >= 256-32)
							{
								out[j*4+0] = quakepalette[data[j]*3+0];
								out[j*4+1] = quakepalette[data[j]*3+1];
								out[j*4+2] = quakepalette[data[j]*3+2];
								out[j*4+3] = 255;
							}
							else
							{
								out[j*4+0] = 0;
								out[j*4+1] = 0;
								out[j*4+2] = 0;
								out[j*4+3] = 0;
							}
						}
					}
					else
					{
						for (size_t j = (in->width>>l)*(in->height>>l); j-- > 0 ;)
						{
							out[j*4+0] = 0;
							out[j*4+1] = 0;
							out[j*4+2] = 0;
							out[j*4+3] = 0;
						}
					}
#ifdef USEGL
					pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[shape].diffuse);
					pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, l, 0, 0, fullbright, in->width, in->height, 1, GL_RGBA, GL_UNSIGNED_BYTE, out);
					CheckGLError();
#endif
#ifdef USEVK
					FillTextureSync(shapes[shape].image, in->width, in->height, fullbright, ~0, out);
#endif
					delete[] out;
				}
			}
		}

#ifdef USEGL
		for (i = 0; i < numshapes; i++)
		{
			pglBindTexture(GL_TEXTURE_2D_ARRAY, shapes[i].diffuse);
			pglGenerateMipmap(GL_TEXTURE_2D_ARRAY);

#ifdef USEBINDLESS
			shapes[i].diffuse_handle = pglGetTextureHandleARB(shapes[i].diffuse);
			pglMakeTextureHandleResidentARB(shapes[i].diffuse_handle);
#endif
			CheckGLError();
		}
#endif

		delete quakepalette;
	}

	void LoadTexinfo(bsp_texinfo_t *in, unsigned int lumpsize)
	{
		unsigned int i;
		mtexinfo_t *out;
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of texinfo lump");

		numtexinfo = lumpsize / sizeof(*in);
		out = texinfo = new mtexinfo_t[numtexinfo];	//used temporarily

		for (i = 0; i < numtexinfo; i++, in++, out++)
		{
			assert(in->texture_id < numtextures);
			out->texturenum = in->texture_id;
			out->flags = in->flags & 1;
			if (loadtemp.miptexinfo[out->texturenum].type == SURF_SKY)
				out->flags |= 3;
			for (unsigned int j = 0; j < 2; j++)
			{
				out->tplane[j].norm[0] = in->tplanes[j].normal[0];
				out->tplane[j].norm[1] = in->tplanes[j].normal[1];
				out->tplane[j].norm[2] = in->tplanes[j].normal[2];
				out->tplane[j].dist = in->tplanes[j].dist;
			}
		}
	}

#if 1
	unsigned int FillLightmap(msurf_t *s, unsigned int extents[2], unsigned int lmmin[2])
	{
		unsigned int f, y, l, j;
		unsigned int u;
		//this is fairly inefficient actually...

		f = -1;
		if (!numlightmaps)
		{	//first time
			lms = new lm_t*[1];
			numlightmaps++;
			lms[0] = new lm_t;
			memset(lms[0]->lmblock, 0, sizeof(lms[0]->lmblock));

			memset(lmwidths, 0, sizeof(lmwidths));
		}

		for (y = 0; y+extents[1] < LMSIZE; y++)
		{
			if (lmwidths[y]+extents[0] < LMSIZE)
			{
				if (f == -1)
					f = y;
				if (y-f == extents[1])
				{	//it fits
					l = 0;
					for (j = f; j <= y; j++)
					{
						if (l < lmwidths[j])
							l = lmwidths[j];
					}
					for (j = f; j <= y; j++)
						lmwidths[j] = l + extents[0];

					//that's the block allocated...
					lmmin[0] = l;
					lmmin[1] = f;
					return numlightmaps-1;
				}
			}
			else
				f = -1;	//doesn't fit, perhaps it will on the next.
		}

		//doesn't fit.
		{
			lm_t **nl, **ol;
			ol = lms;
			nl = new lm_t*[numlightmaps+1];
			for (u = 0; u < numlightmaps; u++)
			{
				nl[u] = lms[u];
			}
			lms = nl;
			delete[] ol;
			numlightmaps++;
			lms[numlightmaps-1] = new lm_t;
			memset(lms[numlightmaps-1]->lmblock, 0, sizeof(lms[numlightmaps-1]->lmblock));

			memset(lmwidths, 0, sizeof(lmwidths));
		}
		return FillLightmap(s, extents, lmmin);	//try again
	}
#endif
	void LoadLighting(unsigned char *in, unsigned int lumpsize)
	{
		lightmapsrc = new unsigned char[lumpsize];
		memcpy(lightmapsrc, in, lumpsize);
		lightmapsize = lumpsize;
	}

#ifdef USEGL
	void LoadMainVAO(void)
	{
		CheckGLError();
		
		//generate our vao
		pglGenVertexArrays(1, &vao);
		pglGenBuffers(1, &vbo);
		pglGenBuffers(1, &ebo);
		pglBindVertexArray(vao);
		pglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
		pglBindBuffer(GL_ARRAY_BUFFER, vbo);
		pglBufferData(GL_ELEMENT_ARRAY_BUFFER, numidx*sizeof(*ridx), NULL, GL_DYNAMIC_DRAW);
		pglBufferData(GL_ARRAY_BUFFER, numverts*sizeof(*vbodata), vbodata, GL_STATIC_DRAW);

		UpdateVAO();

		pglBindVertexArray(0);

		//vbo not ebo - idx info is needed for vising
		delete[] vbodata;
		vbodata = NULL;
	}
	void LoadShader(void)
	{
		CheckGLError();
		shader = GL_CompileShader(vertexprogram, fragmentprogram);
		CheckGLError();

		GLuint ub = pglGetUniformBlockIndex(shader,  "MatrixBlock");
		if (ub != -1)
			pglUniformBlockBinding(shader, ub, 0);

		pglUseProgram(shader);

		ub = pglGetUniformLocation(shader, "lightmaps");
		if (ub != -1)
			pglUniform1i(ub, 1);
		ub = pglGetUniformLocation(shader, "tex");
		if (ub != -1)
			pglUniform1i(ub, 0);

		CheckGLError();
	}

	void LoadLightmapArray(void)
	{
		CheckGLError();

		pglGenTextures(1, &lightmaptexture);
		pglBindTexture(GL_TEXTURE_2D_ARRAY, lightmaptexture);
		CheckGLError();
		pglTexStorage3D(GL_TEXTURE_2D_ARRAY, 1, GL_RGBA8, LMSIZE, LMSIZE, numlightmaps);
		CheckGLError();
		pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_MIN_FILTER,GL_LINEAR);
		pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_MAG_FILTER,GL_LINEAR);
		pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_WRAP_S,GL_CLAMP_TO_EDGE);
		pglTexParameteri(GL_TEXTURE_2D_ARRAY,GL_TEXTURE_WRAP_T,GL_CLAMP_TO_EDGE);

		CheckGLError();

		for (unsigned int i = 0; i < numlightmaps; i++)
		{
			pglTexSubImage3D (GL_TEXTURE_2D_ARRAY, 0, 0, 0, i, LMSIZE, LMSIZE, 1, GL_RGBA, GL_UNSIGNED_BYTE, lms[i]->lmblock);
		}

		pglActiveTexture(GL_TEXTURE1);
		pglBindTexture(GL_TEXTURE_2D_ARRAY, lightmaptexture);
		pglActiveTexture(GL_TEXTURE0);

		CheckGLError();
	}
#endif
#ifdef USEVK
	VkPipeline pipeline[3];	//wall, water, sky.
	VkPipelineLayout pipeLayout;
	VkDescriptorSetLayout descSetLayout[2];	//general, pershape
	VkDescriptorPool descPools[2];

	vk_buffer_t ubo;
	vk_image_t lightmaps;
	VkDescriptorSet descriptors;

	uint32_t memory_type_from_properties(uint32_t typeBits, VkFlags requirements_mask)
	{
		// Search memtypes to find first index with those properties
		for (uint32_t i = 0; i < 32; i++)
		{
			if ((typeBits & 1) == 1)
			{
				// Type is available, does it match user properties?
				if ((vk_memory_properties.memoryTypes[i].propertyFlags & requirements_mask) == requirements_mask)
					return i;
			}
			typeBits >>= 1;
		}

		VkAssert(VK_ERROR_MEMORY_MAP_FAILED);
		return 0;
	}

	VkDescriptorSet SetupUBO(void)
	{
		VkDescriptorPoolSize dpisz[2] = {};
		dpisz[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
		dpisz[0].descriptorCount = 1;
		dpisz[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
		dpisz[1].descriptorCount = 1;
		VkDescriptorPoolCreateInfo dpi = {};	// the creation info states how many descriptor sets are in this pool
		dpi.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
		dpi.flags = 0;
		dpi.maxSets = 1;
		dpi.poolSizeCount = countof(dpisz);
		dpi.pPoolSizes = dpisz;

		VkAssert(pvkCreateDescriptorPool(vkdevice, &dpi, NULL, &descPools[0]));

		VkDescriptorSetAllocateInfo descAllocInfo = {};	// from pool descPool, with layout descSetLayout
		descAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
		descAllocInfo.descriptorPool = descPools[0];
		descAllocInfo.descriptorSetCount = 1;
		descAllocInfo.pSetLayouts = &descSetLayout[0];

		VkDescriptorSet descriptors;
		VkAssert(pvkAllocateDescriptorSets(vkdevice, &descAllocInfo, &descriptors));

		ubo = CreateBuffer(sizeof(struct scenestate_s), VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT, true);

		VkDescriptorBufferInfo bufinfo = {};
		bufinfo.buffer = ubo.buffer;
		bufinfo.offset = 0;
		bufinfo.range = sizeof(struct scenestate_s);
		VkWriteDescriptorSet descriptorWrite = {};	// write the details of our UBO buffer into binding 0
		descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descriptorWrite.dstSet = descriptors;
		descriptorWrite.dstBinding = 0;
		descriptorWrite.dstArrayElement = 0;
		descriptorWrite.descriptorCount = 1;
		descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
		descriptorWrite.pImageInfo = NULL;
		descriptorWrite.pBufferInfo = &bufinfo;
		descriptorWrite.pTexelBufferView = NULL;

		pvkUpdateDescriptorSets(vkdevice, 1, &descriptorWrite, 0, NULL);

		return descriptors;
	}

	void LoadLightmapArray(void)
	{
#if 1
		lightmaps = CreateTexture2DArray(LMSIZE, LMSIZE, numlightmaps, 1);
		CreateTextureSampler(&lightmaps, true, true);
		for (unsigned int i = 0; i < numlightmaps; i++)
		{
			FillTextureSync(lightmaps, LMSIZE, LMSIZE, i, 0, lms[i]->lmblock);
		}


		VkDescriptorImageInfo imginfo = {};
		imginfo.sampler = lightmaps.sampler;
		imginfo.imageView = lightmaps.imageView;
		imginfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

		VkWriteDescriptorSet descriptorWrite = {};	// write the details of our UBO buffer into binding 0
		descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
		descriptorWrite.dstSet = descriptors;
		descriptorWrite.dstBinding = 1;
		descriptorWrite.dstArrayElement = 0;
		descriptorWrite.descriptorCount = 1;
		descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
		descriptorWrite.pImageInfo = &imginfo;
		descriptorWrite.pBufferInfo = NULL;
		descriptorWrite.pTexelBufferView = NULL;
		pvkUpdateDescriptorSets(vkdevice, 1, &descriptorWrite, 0, NULL);
#endif

		//sort out shape textures
		VkDescriptorPoolSize dpisz[1] = {};
		dpisz[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
		dpisz[0].descriptorCount = 1;
		VkDescriptorPoolCreateInfo dpi = {};	// the creation info states how many descriptor sets are in this pool
		dpi.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
		dpi.flags = 0;
		dpi.maxSets = numshapes;
		dpi.poolSizeCount = countof(dpisz);
		dpi.pPoolSizes = dpisz;

		VkAssert(pvkCreateDescriptorPool(vkdevice, &dpi, NULL, &descPools[1]));
		for (unsigned int i = 0; i < numshapes; i++)
		{
			VkDescriptorSetAllocateInfo descAllocInfo = {};	// from pool descPool, with layout descSetLayout
			descAllocInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
			descAllocInfo.descriptorPool = descPools[1];
			descAllocInfo.descriptorSetCount = 1;
			descAllocInfo.pSetLayouts = &descSetLayout[1];

			VkAssert(pvkAllocateDescriptorSets(vkdevice, &descAllocInfo, &shapes[i].descriptors));

	


			VkDescriptorImageInfo imginfo = {};
			imginfo.sampler = shapes[i].image.sampler;
			imginfo.imageView = shapes[i].image.imageView;
			imginfo.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;

			VkWriteDescriptorSet descriptorWrite = {};	// write the details of our UBO buffer into binding 0
			descriptorWrite.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
			descriptorWrite.dstSet = shapes[i].descriptors;
			descriptorWrite.dstBinding = 0;
			descriptorWrite.dstArrayElement = 0;
			descriptorWrite.descriptorCount = 1;
			descriptorWrite.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
			descriptorWrite.pImageInfo = &imginfo;
			descriptorWrite.pTexelBufferView = NULL;

			pvkUpdateDescriptorSets(vkdevice, 1, &descriptorWrite, 0, NULL);
		}
	}
	inline void WalkNodes(rentity_t *ent, rstate_t *scene)
	{
		int rendermode = vid->mode;
		matrix4x4 identity;
		identity.MakeIdentity();

		if (rendermode == MODE_WHOLEWORLD_MULTI)
		{
			BruteFaces(submodels, ent, scene);
			return;
		}

		VkDescriptorSet sets[2] = {};
		sets[0] = descriptors;
		sets[1] = shapes[0].descriptors;

		if (0)
		{	//whole world in a single indexed draw. buggy because of the lack of texture support.
			uint32_t permu = ~0;
			pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, /*first*/0, countof(sets), sets, 0, NULL);
			pvkCmdPushConstants(vkmaincmd, pipeLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ent->matrix.data), ent->matrix.data);//sizeof(identity.data), identity.data);
			pvkCmdBindIndexBuffer(vkmaincmd, ebo.buffer, 0, VK_INDEX_TYPE_UINT32);
			const VkDeviceSize offsets[1] = {0};
			pvkCmdBindVertexBuffers(vkmaincmd, 0, 1, &vbo.buffer, offsets);
			pvkCmdBindPipeline(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline[0]);
			pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, /*first*/1, /*count*/1, &shapes[0].descriptors, 0, NULL);
			FillBufferSync(&ebo, 0, numidx*sizeof(*ridx), idxdata);
			pvkCmdDrawIndexed(vkmaincmd, numidx, 1, 0, 0, 0);
		}
		else if (1)
		{
			struct rleaf_s *viewleaf;
			if (numsubmodels == 1)
				viewleaf = leaves;	//put it in the solid leaf, so it opens up pvs and doesn't result in each ammobox rewriting index lists.
			else
				viewleaf = GetLeafAtOrg(submodels[0].rootnode, relativeviewofs);
			if (lastviewleaf != viewleaf)
			{
				//FIXME: really we should be running this on a different thread, and caching the commandbuffer
				unsigned char decompressedvis[(MAX_MAP_LEAFS+7)/8];
				lastviewleaf = viewleaf;
				DecompressVis(viewleaf->vis, decompressedvis);
				visframe++;


				unsigned int *ridxstart;
				ridxstart = new unsigned int[numidx];
				//update the element ebo according to vis (note that this would require rebuilding the ebo if pvs+frustum checks were disabled.
				if (ridxstart)
				{	//mapbuffer is technically allowed to fail.
					ridx_ati = (unsigned int**)_alloca(sizeof(unsigned int*) * numshapes);

					for (size_t i = 0; i < numshapes; i++)
						ridx_ati[i] = ridxstart + shapes[i].firstidx;
					markframe++;
					WalkLeafsMulti(decompressedvis);

					FillBufferSync(&ebo, 0, numidx*sizeof(*ridx), ridxstart);

					for (size_t i = 0; i < numshapes; i++)
					{
						size_t count = (ridx_ati[i] - (ridxstart + shapes[i].firstidx));
						size_t firstidx = ridx_ati[i]-count - ridxstart;
						shapes[i].batchidxcount = (uint32_t)count;
						shapes[i].batchidxfirst = (uint32_t)firstidx;
					}
					delete[] ridxstart;
				}
			}

			//FIXME: we can cache much of this in some intermediate command buffer.
			//vkCmdExecuteCommands(vkmaincmd, 1, &worldcmd);

			uint32_t permu = ~0;
			pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, /*first*/0, countof(sets), sets, 0, NULL);
			pvkCmdPushConstants(vkmaincmd, pipeLayout, VK_SHADER_STAGE_VERTEX_BIT, 0, sizeof(ent->matrix.data), ent->matrix.data);//sizeof(identity.data), identity.data);
			pvkCmdBindIndexBuffer(vkmaincmd, ebo.buffer, 0, VK_INDEX_TYPE_UINT32);
			const VkDeviceSize offsets[1] = {0};
			pvkCmdBindVertexBuffers(vkmaincmd, 0, 1, &vbo.buffer, offsets);

			for (size_t i = 0; i < numshapes; i++)
			{
				uint32_t count = shapes[i].batchidxcount;
				uint32_t firstidx = shapes[i].batchidxfirst;
				if (!count)
					continue;
				if (permu != shapes[i].type)
				{
					permu = shapes[i].type;
					pvkCmdBindPipeline(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline[permu]);
				}
				pvkCmdBindDescriptorSets(vkmaincmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeLayout, 1, 1, &shapes[i].descriptors, 0, NULL);
				pvkCmdDrawIndexed(vkmaincmd, count, 1, firstidx, 0, 0);
			}
		}
	}
	void LoadShader(void)
	{
		//per-model
		{
			VkDescriptorSetLayoutBinding db[2] = {};
			db[0].binding = 0;
			db[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
			db[0].descriptorCount = 1;
			db[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT|VK_SHADER_STAGE_FRAGMENT_BIT;
			db[0].pImmutableSamplers = NULL;

			db[1].binding = 1;
			db[1].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
			db[1].descriptorCount = 1;
			db[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
			db[1].pImmutableSamplers = NULL;

			VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {};
			descSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
			descSetLayoutCreateInfo.bindingCount = countof(db);
			descSetLayoutCreateInfo.pBindings = db;
			VkAssert(pvkCreateDescriptorSetLayout(vkdevice, &descSetLayoutCreateInfo, NULL, &descSetLayout[0]));
		}

		//per-shape
		{
			VkDescriptorSetLayoutBinding db[1] = {};
			db[0].binding = 0;
			db[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
			db[0].descriptorCount = 1;
			db[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
			db[0].pImmutableSamplers = NULL;

			VkDescriptorSetLayoutCreateInfo descSetLayoutCreateInfo = {};
			descSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
			descSetLayoutCreateInfo.bindingCount = countof(db);
			descSetLayoutCreateInfo.pBindings = db;
			VkAssert(pvkCreateDescriptorSetLayout(vkdevice, &descSetLayoutCreateInfo, NULL, &descSetLayout[1]));
		}

		VkPushConstantRange pushconstants[1] = {};
		pushconstants[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
		pushconstants[0].offset = 0;
		pushconstants[0].size = sizeof(matrix4x4);

		VkPipelineLayoutCreateInfo pipeLayoutCreateInfo = {};
		pipeLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
		pipeLayoutCreateInfo.flags = 0;
		pipeLayoutCreateInfo.setLayoutCount = countof(descSetLayout);
		pipeLayoutCreateInfo.pSetLayouts = descSetLayout;
		pipeLayoutCreateInfo.pushConstantRangeCount = countof(pushconstants);
		pipeLayoutCreateInfo.pPushConstantRanges = pushconstants;
		VkAssert(pvkCreatePipelineLayout(vkdevice, &pipeLayoutCreateInfo, NULL, &pipeLayout));

		VkDynamicState dynamicStateEnables[VK_DYNAMIC_STATE_RANGE_SIZE]={};
		VkPipelineDynamicStateCreateInfo dyn = {};
		dyn.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
		dyn.flags = 0;
		dyn.dynamicStateCount = 0;
		dyn.pDynamicStates = dynamicStateEnables;

		VkVertexInputBindingDescription vbinds[1] = {};
		vbinds[0].binding = 0;
		vbinds[0].stride = sizeof(*vbodata);
		vbinds[0].inputRate = VK_VERTEX_INPUT_RATE_VERTEX;
		VkVertexInputAttributeDescription vattrs[4] = {};
		vattrs[0].binding = vbinds[0].binding;
		vattrs[0].location = 0;
		vattrs[0].format = VK_FORMAT_R32G32B32_SFLOAT;
		vattrs[0].offset = (char*)vbodata->org - (char*)vbodata;
		vattrs[1].binding = vbinds[0].binding;
		vattrs[1].location = 1;
		vattrs[1].format = VK_FORMAT_R32G32B32_SINT;
		vattrs[1].offset = (char*)vbodata->lmidx - (char*)vbodata;
		vattrs[2].binding = vbinds[0].binding;
		vattrs[2].location = 2;
		vattrs[2].format = VK_FORMAT_R32G32B32A32_SFLOAT;
		vattrs[2].offset = (char*)vbodata->st - (char*)vbodata;
		vattrs[3].binding = vbinds[0].binding;
		vattrs[3].location = 3;
		vattrs[3].format = VK_FORMAT_R8G8B8A8_UINT;
		vattrs[3].offset = (char*)vbodata->lightstyle - (char*)vbodata;

		VkPipelineVertexInputStateCreateInfo vi = {};
		vi.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
		vi.vertexBindingDescriptionCount = countof(vbinds);
		vi.pVertexBindingDescriptions = vbinds;
		vi.vertexAttributeDescriptionCount = countof(vattrs);
		vi.pVertexAttributeDescriptions = vattrs;

		VkPipelineInputAssemblyStateCreateInfo ia = {};
		ia.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
		ia.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
		VkPipelineViewportStateCreateInfo vp = {};
		vp.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
		vp.viewportCount = 1;
		dynamicStateEnables[dyn.dynamicStateCount++] =	VK_DYNAMIC_STATE_VIEWPORT;
		vp.scissorCount = 1;
		dynamicStateEnables[dyn.dynamicStateCount++] =	VK_DYNAMIC_STATE_SCISSOR;
		VkPipelineRasterizationStateCreateInfo rs = {};
		rs.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
		rs.polygonMode = VK_POLYGON_MODE_FILL;
		rs.cullMode = VK_CULL_MODE_BACK_BIT;
		rs.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
		rs.depthClampEnable = VK_FALSE;
		rs.rasterizerDiscardEnable = VK_FALSE;
		rs.depthBiasEnable = VK_FALSE;
		VkPipelineMultisampleStateCreateInfo ms = {};
		ms.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
		ms.pSampleMask = NULL;
		ms.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
		VkPipelineDepthStencilStateCreateInfo ds = {};
		ds.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
		ds.depthTestEnable = VK_TRUE;
		ds.depthWriteEnable = VK_TRUE;
		ds.depthCompareOp = VK_COMPARE_OP_LESS_OR_EQUAL;
		ds.depthBoundsTestEnable = VK_FALSE;
		ds.back.failOp = VK_STENCIL_OP_KEEP;
		ds.back.passOp = VK_STENCIL_OP_KEEP;
		ds.back.compareOp = VK_COMPARE_OP_ALWAYS;
		ds.stencilTestEnable = VK_FALSE;
		ds.front = ds.back;
		VkPipelineColorBlendStateCreateInfo cb = {};
		cb.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
		VkPipelineColorBlendAttachmentState att_state[1];
		memset(att_state, 0, sizeof(att_state));
		att_state[0].colorWriteMask = 0xf;
		att_state[0].blendEnable = VK_FALSE;
		cb.attachmentCount = 1;
		cb.pAttachments = att_state;

		struct specdata_s
		{
			int mode;
		} specdata;

		VkSpecializationMapEntry specentries[1] = {};
		specentries[0].constantID = 0;
		specentries[0].offset = offsetof(struct specdata_s, mode);
		specentries[0].size = sizeof(specdata.mode);

		VkSpecializationInfo specInfo = {};
		specInfo.mapEntryCount = countof(specentries);
		specInfo.pMapEntries = specentries;
		specInfo.dataSize = sizeof(specdata);
		specInfo.pData = &specdata;

		VkPipelineShaderStageCreateInfo shaderStages[2] = {};
		shaderStages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
		shaderStages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
		shaderStages[0].module = LoadShaderCode("world_vert.spv", spv_world_vert);
		shaderStages[0].pName = "main";
		shaderStages[0].pSpecializationInfo = &specInfo;
		shaderStages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
		shaderStages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
		shaderStages[1].module = LoadShaderCode("world_frag.spv", spv_world_frag);
		shaderStages[1].pName = "main";
		shaderStages[1].pSpecializationInfo = &specInfo;

		for (uint32_t permu = 0; permu < 3; permu++)
		{
			specdata.mode = permu;

			VkGraphicsPipelineCreateInfo pipeCreateInfo = {};
			pipeCreateInfo.sType				= VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
			pipeCreateInfo.flags				= 0;
			pipeCreateInfo.stageCount			= countof(shaderStages);
			pipeCreateInfo.pStages				= shaderStages;
			pipeCreateInfo.pVertexInputState	= &vi;
			pipeCreateInfo.pInputAssemblyState	= &ia;
			pipeCreateInfo.pTessellationState	= NULL;	//null is okay!
			pipeCreateInfo.pViewportState		= &vp;
			pipeCreateInfo.pRasterizationState	= &rs;
			pipeCreateInfo.pMultisampleState	= &ms;
			pipeCreateInfo.pDepthStencilState	= &ds;
			pipeCreateInfo.pColorBlendState		= &cb;
			pipeCreateInfo.pDynamicState		= &dyn;
			pipeCreateInfo.layout				= pipeLayout;
			pipeCreateInfo.renderPass			= vkrenderpass;
			pipeCreateInfo.subpass				= 0;
			pipeCreateInfo.basePipelineHandle	= VK_NULL_HANDLE;
			pipeCreateInfo.basePipelineIndex	= -1;	//not sure what this is about.

			if (permu)
			{
				pipeCreateInfo.basePipelineHandle = pipeline[0];
				pipeCreateInfo.flags = VK_PIPELINE_CREATE_DERIVATIVE_BIT;
			}
			else
				pipeCreateInfo.flags = VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT;

			VkAssert(pvkCreateGraphicsPipelines(vkdevice, VK_NULL_HANDLE, 1, &pipeCreateInfo, vkallocationcb, &pipeline[permu]));
		}
		//this is meant to explicitly be safe.
		pvkDestroyShaderModule(vkdevice, shaderStages[0].module, vkallocationcb);
		pvkDestroyShaderModule(vkdevice, shaderStages[1].module, vkallocationcb);
	
		descriptors = SetupUBO();
	}
	void LoadMainVAO(void)
	{
		ebo = CreateBuffer(numidx*sizeof(*ridx), VK_BUFFER_USAGE_INDEX_BUFFER_BIT, false);

		vbo = CreateBuffer(numverts*sizeof(*vbodata), VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, false);
		FillBufferSync(&vbo, 0, numverts*sizeof(*vbodata), vbodata);


		//vbo not ebo - idx info is needed for vising
		delete[] vbodata;
		vbodata = NULL;
	}
#endif

	void CalcFaceLighting(msurf_t *face, vbodata_s *vert, size_t numverts, unsigned int lightofs, unsigned char *lightstyles)
	{
		float min[2], max[2];
		min[0] = max[0] = vert[0].st[0] = ((vec3(vert[0].org) * face->texinfo->tplane[0].norm + face->texinfo->tplane[0].dist));
		min[1] = max[1] = vert[0].st[1] = ((vec3(vert[0].org) * face->texinfo->tplane[1].norm + face->texinfo->tplane[1].dist));
		for (size_t e = 1; e < numverts; e++)
		{
			for (int j = 0; j < 2; j++)
			{
				vert[e].st[j] = ((vec3(vert[e].org) * face->texinfo->tplane[j].norm + face->texinfo->tplane[j].dist));

				if (min[j] > vert[e].st[j])
					min[j] = vert[e].st[j];
				if (max[j] < vert[e].st[j])
					max[j] = vert[e].st[j];
			}
		}
		float lmtcmin[2];
		unsigned int lmmin[2];
		unsigned int lmext[2];
		float lmscale = 16;
		for (int j = 0; j < 2; j++)
		{	//q1 has 16 lightmap pixels for each 1 texture pixel
			lmext[j] = (int)((ceil(max[j]/lmscale) - floor(min[j]/lmscale)) + 1);
			lmtcmin[j] = (int)(floor(min[j]/lmscale)*lmscale);
		}
		int lmidx;
		if (face->texinfo->flags & TEX_SPECIAL)
		{
			if (face->texinfo->flags & 2)
				lmidx = -2;	//sky
			else
				lmidx = -1;	//turb
		}
		else
			lmidx = FillLightmap(face, lmext, lmmin);
		if (lmidx != -1 && lmidx != -2)
		{
			if (!lightmapsize)
			{
				static unsigned char nullstyles[] = {0, ~0, ~0, ~0};
				lightstyles = nullstyles;
				for (unsigned int e = 0; e < 4; e++)
				{
					unsigned char *lmout = lms[lmidx]->lmblock;
					lmout += e + 4*(lmmin[0] + LMSIZE * lmmin[1]);
					for (unsigned int y = 0; y < lmext[1]; y++)
					{
						for (unsigned int x = 0; x < lmext[0]; x++)
							lmout[x*4] = e?0:255;
						lmout += LMSIZE*4;
					}
				}
			}
			else if (lightofs == -1)
			{
				for (int e = 0; e < 4; e++)
				{
					unsigned char *lmout = lms[lmidx]->lmblock;
					lmout += e + 4*(lmmin[0] + LMSIZE * lmmin[1]);
					if (lightstyles[e] == 255)
						break;
					for (unsigned int y = 0; y < lmext[1]; y++)
					{
						for (unsigned int x = 0; x < lmext[0]; x++)
							lmout[x*4] = 0;
						lmout += LMSIZE*4;
					}
				}
			}
			else
			{
				unsigned char *lmin = lightmapsrc + lightofs;
				for (int e = 0; e < 4; e++)
				{
					unsigned char *lmout = lms[lmidx]->lmblock;
					lmout += e + 4*(lmmin[0] + LMSIZE * lmmin[1]);
					if (lightstyles[e] == 255)
						break;
					lightofs += lmext[0]*lmext[1];
					if (lightofs > lightmapsize)
					{
						Com_Printf(PRINT_WARNING, "Surface %i lighting exceeds lightmap data\n", face-surfs);
						break;
					}
					for (unsigned int y = 0; y < lmext[1]; y++)
					{
						for (unsigned int x = 0; x < lmext[0]; x++)
							lmout[x*4] = *lmin++;
						lmout += LMSIZE*4;
					}
				}
			}
		}

		for (unsigned int e = 0; e < numverts; e++, vert++)
		{
			for (unsigned int j = 0; j < 2; j++)
				vert->st[2+j] = ((vert->st[j] - lmtcmin[j]) + (lmmin[j]+0.5)*lmscale)/(LMSIZE*lmscale);

			vert->lightstyle[0] = lightstyles[0];
			vert->lightstyle[1] = lightstyles[1];
			vert->lightstyle[2] = lightstyles[2];
			vert->lightstyle[3] = lightstyles[3];

			vert->lmidx[0] = (lmidx<0)?lmidx:lmidx;
			vert->lmidx[1] = textures[face->texinfo->texturenum].subshape*2;
			vert->lmidx[2] = textures[face->texinfo->texturenum].shape;
			vert->st[0] /= loadtemp.miptexinfo[face->texinfo->texturenum].width;
			vert->st[1] /= loadtemp.miptexinfo[face->texinfo->texturenum].height;
		}
	}

	template <typename dface_t, typename dedge_t> void LoadFaces(dface_t *in, int lumpsize)
	{
		unsigned int i, e;
		signed int se;
		msurf_t *out;
		unsigned int vn;
		vbodata_s *vert;
		unsigned int *idx;

		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of faces lump");
		numsurfs = lumpsize / sizeof(*in);

		out = surfs = new msurf_t[numsurfs];
		for (i = 0, numverts = 0, numidx = 0; i < numsurfs; i++, in++, out++)
		{
			assert(in->texinfo_id < numtexinfo);
			out->texinfo = &texinfo[in->texinfo_id];
			out->multiinstance = textures[out->texinfo->texturenum].shape;

			numverts += in->edgeref_count;
			numidx += (in->edgeref_count-2)*3;
			shapes[out->multiinstance].numidx += (in->edgeref_count-2)*3;
		}

		for (i = 0; i < numshapes; i++)
		{
			if (i)
				shapes[i].firstidx = shapes[i-1].firstidx + shapes[i-1].numidx;
			else
				shapes[i].firstidx = 0;
		}

		vert = vbodata = new vbodata_s[numverts];
		idx = idxdata = new unsigned int[numidx];

		numlightmaps = 0;
		lms = NULL;

		memset(vbodata, 0, sizeof(*vbodata)*numverts);

		for (i = 0, out = surfs, in -= numsurfs; i < numsurfs; i++, in++, out++)
		{
			out->firstidx = idx;
			//subdivide the polygon into a triangle fan
			for (e = 0; e < (unsigned)(in->edgeref_count-2); e++)
			{
				*idx++ = vert-vbodata + e+2;
				*idx++ = vert-vbodata + e+1;
				*idx++ = vert-vbodata + 0;
			}
			out->numidx = idx - out->firstidx;

			//FIXME: assert edgeref_first+edgeref_count are within edge references

			for (e = 0; e < in->edgeref_count; e++)
			{
				se = loadtemp.edgerefs[in->edgeref_first+e];
				bool neg = se<0;
				if (neg)
					se = -se;
				assert(se>0 && (unsigned int)se<loadtemp.numedgerefs);

				vn = ((dedge_t*)loadtemp.edges)[se].verts[neg];
				assert(vn>=0 && vn<loadtemp.numverts);

				vert[e].org[0] = loadtemp.verts[vn].pos[0];
				vert[e].org[1] = loadtemp.verts[vn].pos[1];
				vert[e].org[2] = loadtemp.verts[vn].pos[2];
			}

			CalcFaceLighting(out, vert, in->edgeref_count, in->lightofs, in->lightstyle);
			vert+=in->edgeref_count;
		}
	}

	template <typename surfref_t> void LoadSurfaceReferences(surfref_t *in, unsigned int lumpsize)
	{
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of face references lump");
		loadtemp.numfacerefs = lumpsize/sizeof(*in);
		loadtemp.facerefs = in;
	}

	void LoadPlanes(bsp_plane_t *in, unsigned int lumpsize)
	{
		if (lumpsize % sizeof(*in))
			Sys_Error("Funny size of planes lump");
		loadtemp.numplanes = lumpsize/sizeof(*in);
		loadtemp.planes = in;
	}

	void LoadVisibility(unsigned char *in, unsigned int lumpsize)
	{
		pvs = new unsigned char[lumpsize];
		memcpy(pvs, in, lumpsize);
		pvslumpsize = lumpsize;
	}

#ifdef USEBINDLESS
	void CalcNodeParents(struct rnode_s *node, struct rnode_s *parent)
	{
		node->parent = parent;
		if (node->contents)
			return;	//looks like a leaf
		CalcNodeParents(node->child[0], node);
		CalcNodeParents(node->child[1], node);
	}
#endif
	template <typename bsp_model_t> void LoadSubmodels2(bsp_model_t *in, int lumpsize)
	{
		numsubmodels = lumpsize / sizeof(*in);
		submodel *out;
#ifdef USEBINDLESS
		bool calcparents = !!submodels;
#endif
		if (!submodels)
			submodels = new submodel[numsubmodels];
		out = submodels;
		assert(numsubmodels > 0);

		for (unsigned int i = 0; i < numsubmodels; i++, out++, in++)
		{
			out->cullradius = 4096;	//FIXME: use some proper culling

			out->face_first = in->face_first;
			out->face_count = in->face_count;
			out->visleafs = in->visleafs;
			out->rootnode = nodes + in->node[0];
			out->owner = this;
#ifdef USEGL
			out->vao = 0;
#endif
#ifdef USEVK
			memset(&out->ebo, 0, sizeof(out->ebo));
#endif
		}

#ifdef USEBINDLESS
		if (calcparents)
			for (unsigned int i = 0; i < numsubmodels; i++)
				CalcNodeParents(submodels[i].rootnode, NULL);
#endif
	}
	void LoadSubmodels(void *ptr, int lumpsize)
	{
		bsp_model_quake_t *in = (bsp_model_quake_t*)ptr;
		//hexen2 bsps overflow with unused hulls, resulting in the world having no faces when read as quake-format.
		//the lumpsize might be 'wrong' too, which is a stronger hint but not always present.
		if ((lumpsize > sizeof(*in) && !in->face_count && !(lumpsize % sizeof(bsp_model_hexen2_t))) || (lumpsize % sizeof(bsp_model_quake_t)))
			LoadSubmodels2((bsp_model_hexen2_t *)in, lumpsize);
		else
			LoadSubmodels2((bsp_model_quake_t *)in, lumpsize);
	}


	template <typename dleaf_t, typename faceref_t> void LoadLeaves(dleaf_t *in, unsigned int lumpsize)
	{
		struct rleaf_s *out;
		unsigned int i, j;
		numleaves = lumpsize/sizeof(*in);
		pvsrowbytes = (numleaves+7)/8;
		out = leaves = new struct rleaf_s[numleaves];

		for (i = 0; i < numleaves; i++, out++, in++)
		{
			out->maxs[0] = in->maxs[0];
			out->maxs[1] = in->maxs[1];
			out->maxs[2] = in->maxs[2];

			out->mins[0] = in->mins[0];
			out->mins[1] = in->mins[1];
			out->mins[2] = in->mins[2];

			if (in->vis_offset < 0 || in->vis_offset >= pvslumpsize)
				out->vis = NULL;
			else
				out->vis = pvs + in->vis_offset;

#ifdef USEBINDLESS
			out->visframe = visframe;
#endif
			out->contents = in->type;
			out->numsurfs = in->faceref_count;
			out->child[0] = 0;
			out->child[1] = 0;
			out->surf = new msurf_t*[out->numsurfs];  
			assert ((unsigned int)in->faceref_first+in->faceref_count <= loadtemp.numfacerefs);
			for (j = 0; j < in->faceref_count; j++)
				out->surf[j] = &surfs[((faceref_t*)loadtemp.facerefs)[in->faceref_first+j]];

#ifdef USEBINDLESS
			out->parent = NULL;
#endif
		}
		leaves[0].vis = NULL;
	}
	template <typename dnode_t> void LoadNodes(dnode_t *in, unsigned int lumpsize)
	{
		unsigned int i, j;
		struct rnode_s *out;
		loadtemp.numnodes = lumpsize/sizeof(*in);

		out = nodes = new struct rnode_s[loadtemp.numnodes];

		for (i = 0; i < loadtemp.numnodes; i++, in++, out++)
		{
			out->maxs[0] = in->maxs[0];
			out->maxs[1] = in->maxs[1];
			out->maxs[2] = in->maxs[2];

			out->mins[0] = in->mins[0];
			out->mins[1] = in->mins[1];
			out->mins[2] = in->mins[2];

			out->planenormal = loadtemp.planes[in->plane_id].normal;
			out->planedist = loadtemp.planes[in->plane_id].dist;
			out->contents = 0;
			for (j = 0; j < 2; j++)
			{
				if (in->children[j]<0)
				{
					unsigned int ch = -(in->children[j]+1);
					assert(ch<numleaves);
					out->child[j] = &leaves[ch];
				}
				else
				{
					unsigned int ch = in->children[j];
					assert(ch<loadtemp.numnodes);
					out->child[j] = &nodes[ch];
				}
			}

			out->numsurfs = in->face_count;
			out->surf = new msurf_t*[out->numsurfs];
			for (j = 0; j < in->face_count; j++)
				out->surf[j] = &surfs[in->face_first+j];

#ifdef USEBINDLESS
			out->parent = NULL;
#endif
		}
	}

public:
	static model_c *LoadModel29(const char *name, unsigned char *filedata, unsigned int length)
	{
		CheckGLError();
//		Com_Printf(PRINT_NOTIFCATION, "BSP29 %s %u bytes\n", name, length);
		bspmodel *bm;
		bsp_header_t *file = (bsp_header_t *)filedata;

		bm = new bspmodel();

		bm->LoadVerticies							((bsp_vertex_t*)	((unsigned char*)file + file->lumps[LUMP_VERTICIES	].offset), file->lumps[LUMP_VERTICIES	].size);
		bm->LoadEdges<bsp29_edge_t>					((bsp29_edge_t*)	((unsigned char*)file + file->lumps[LUMP_EDGES		].offset), file->lumps[LUMP_EDGES		].size);
		bm->LoadSurfEdges							((signed int*)		((unsigned char*)file + file->lumps[LUMP_EDGEREFS	].offset), file->lumps[LUMP_EDGEREFS	].size);
		bm->LoadTextures							((bsp_miplump_t*)	((unsigned char*)file + file->lumps[LUMP_MIPTEX		].offset), file->lumps[LUMP_MIPTEX		].size);
		bm->LoadTexinfo								((bsp_texinfo_t*)	((unsigned char*)file + file->lumps[LUMP_TEXINFO	].offset), file->lumps[LUMP_TEXINFO		].size);
		bm->LoadLighting							((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_LIGHTMAP	].offset), file->lumps[LUMP_LIGHTMAP	].size);
		bm->LoadSubmodels							((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);	//yes, twice. we're evil.
		bm->LoadFaces<bsp29_face_t,bsp29_edge_t>	((bsp29_face_t*)	((unsigned char*)file + file->lumps[LUMP_FACES		].offset), file->lumps[LUMP_FACES		].size);
		bm->LoadSurfaceReferences<unsigned short>	((unsigned short*)	((unsigned char*)file + file->lumps[LUMP_FACEREFS	].offset), file->lumps[LUMP_FACEREFS	].size);
		bm->LoadPlanes								((bsp_plane_t*)		((unsigned char*)file + file->lumps[LUMP_PLANES		].offset), file->lumps[LUMP_PLANES		].size);
		bm->LoadVisibility							((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_VIS		].offset), file->lumps[LUMP_VIS			].size);
		bm->LoadLeaves<bsp29_leaf_t,unsigned short>	((bsp29_leaf_t*)	((unsigned char*)file + file->lumps[LUMP_LEAVES		].offset), file->lumps[LUMP_LEAVES		].size);
		bm->LoadNodes<bsp29_node_t>					((bsp29_node_t*)	((unsigned char*)file + file->lumps[LUMP_NODES		].offset), file->lumps[LUMP_NODES		].size);
		bm->LoadSubmodels							((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);

		CheckGLError();
		bm->LoadShader();
		CheckGLError();
		bm->LoadMainVAO();
		CheckGLError();
		bm->LoadLightmapArray();
		CheckGLError();

		if (bm->loadtemp.miptexinfo)
			delete bm->loadtemp.miptexinfo;

		memset(&bm->loadtemp, 0, sizeof(bm->loadtemp));

		return bm;
	}
/*	static model_c *LoadModelHL(const char *name, unsigned char *filedata, unsigned int length)
	{
		//halflife has:
		//a) different (4) hulls
		//b) rgb lighting rather than mono
		//c) palette info embedded within textures.
		//d) often external mips.
		CheckGLError();
		Com_Printf(PRINT_NOTIFCATION, "HLBSP %s %u bytes\n", name, length);
		bspmodel *bm;
		bsp_header_t *file = (bsp_header_t *)filedata;

		bm = new bspmodel();

		bm->LoadVerticies							((bsp_vertex_t*)	((unsigned char*)file + file->lumps[LUMP_VERTICIES	].offset), file->lumps[LUMP_VERTICIES	].size);
		bm->LoadEdges<bsp29_edge_t>					((bsp29_edge_t*)	((unsigned char*)file + file->lumps[LUMP_EDGES		].offset), file->lumps[LUMP_EDGES		].size);
		bm->LoadSurfEdges							((signed int*)		((unsigned char*)file + file->lumps[LUMP_EDGEREFS	].offset), file->lumps[LUMP_EDGEREFS	].size);
		bm->LoadTextures							((bsp_miplump_t*)	((unsigned char*)file + file->lumps[LUMP_MIPTEX		].offset), file->lumps[LUMP_MIPTEX		].size);
		bm->LoadTexinfo								((bsp_texinfo_t*)	((unsigned char*)file + file->lumps[LUMP_TEXINFO	].offset), file->lumps[LUMP_TEXINFO		].size);

		bm->lightmapsrc = NULL;
		bm->lightmapsize = 0;
//		bm->LoadLighting							((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_LIGHTMAP	].offset), file->lumps[LUMP_LIGHTMAP	].size);

		bm->LoadSubmodels							((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);	//yes, twice. we're evil.
		bm->LoadFaces<bsp29_face_t,bsp29_edge_t>	((bsp29_face_t*)	((unsigned char*)file + file->lumps[LUMP_FACES		].offset), file->lumps[LUMP_FACES		].size);
		bm->LoadSurfaceReferences<unsigned short>	((unsigned short*)	((unsigned char*)file + file->lumps[LUMP_FACEREFS	].offset), file->lumps[LUMP_FACEREFS	].size);
		bm->LoadPlanes								((bsp_plane_t*)		((unsigned char*)file + file->lumps[LUMP_PLANES		].offset), file->lumps[LUMP_PLANES		].size);
		bm->LoadVisibility							((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_VIS		].offset), file->lumps[LUMP_VIS			].size);
		bm->LoadLeaves<bsp29_leaf_t,unsigned short>	((bsp29_leaf_t*)	((unsigned char*)file + file->lumps[LUMP_LEAVES		].offset), file->lumps[LUMP_LEAVES		].size);
		bm->LoadNodes<bsp29_node_t>					((bsp29_node_t*)	((unsigned char*)file + file->lumps[LUMP_NODES		].offset), file->lumps[LUMP_NODES		].size);
		bm->LoadSubmodels							((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);

		CheckGLError();
		bm->LoadShader();
		CheckGLError();
		bm->LoadMainVAO();
		CheckGLError();
		bm->LoadLightmapArray();
		CheckGLError();

		if (bm->loadtemp.miptexinfo)
			delete bm->loadtemp.miptexinfo;

		memset(&bm->loadtemp, 0, sizeof(bm->loadtemp));

		return bm;
	}
*/	static model_c *LoadModel2(const char *name, unsigned char *filedata, unsigned int length)
	{
		CheckGLError();
//		Com_Printf(PRINT_NOTIFCATION, "BSP2 %s %u bytes\n", name, length);
		bspmodel *bm;
		bsp_header_t *file = (bsp_header_t *)filedata;

		bm = new bspmodel();

		bm->LoadVerticies						((bsp_vertex_t*)	((unsigned char*)file + file->lumps[LUMP_VERTICIES	].offset), file->lumps[LUMP_VERTICIES	].size);
		bm->LoadEdges							((bsp2_edge_t*)		((unsigned char*)file + file->lumps[LUMP_EDGES		].offset), file->lumps[LUMP_EDGES		].size);
		bm->LoadSurfEdges						((signed int*)		((unsigned char*)file + file->lumps[LUMP_EDGEREFS	].offset), file->lumps[LUMP_EDGEREFS	].size);
		bm->LoadTextures						((bsp_miplump_t*)	((unsigned char*)file + file->lumps[LUMP_MIPTEX		].offset), file->lumps[LUMP_MIPTEX		].size);
		bm->LoadTexinfo							((bsp_texinfo_t*)	((unsigned char*)file + file->lumps[LUMP_TEXINFO	].offset), file->lumps[LUMP_TEXINFO		].size);
		bm->LoadLighting						((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_LIGHTMAP	].offset), file->lumps[LUMP_LIGHTMAP	].size);
		bm->LoadSubmodels						((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);	//yes, twice. we're evil.
		bm->LoadFaces<bsp2_face_t,bsp2_edge_t>	((bsp2_face_t*)		((unsigned char*)file + file->lumps[LUMP_FACES		].offset), file->lumps[LUMP_FACES		].size);
		bm->LoadSurfaceReferences				((unsigned int*)	((unsigned char*)file + file->lumps[LUMP_FACEREFS	].offset), file->lumps[LUMP_FACEREFS	].size);
		bm->LoadPlanes							((bsp_plane_t*)		((unsigned char*)file + file->lumps[LUMP_PLANES		].offset), file->lumps[LUMP_PLANES		].size);
		bm->LoadVisibility						((unsigned char*)	((unsigned char*)file + file->lumps[LUMP_VIS		].offset), file->lumps[LUMP_VIS			].size);
		bm->LoadLeaves<bsp2_leaf_t,unsigned int>((bsp2_leaf_t*)		((unsigned char*)file + file->lumps[LUMP_LEAVES		].offset), file->lumps[LUMP_LEAVES		].size);
		bm->LoadNodes							((bsp2_node_t*)		((unsigned char*)file + file->lumps[LUMP_NODES		].offset), file->lumps[LUMP_NODES		].size);
		bm->LoadSubmodels						((void*)			((unsigned char*)file + file->lumps[LUMP_MODELS		].offset), file->lumps[LUMP_MODELS		].size);

		CheckGLError();
		bm->LoadShader();
		CheckGLError();
		bm->LoadMainVAO();
		CheckGLError();
		bm->LoadLightmapArray();
		CheckGLError();

		if (bm->loadtemp.miptexinfo)
			delete bm->loadtemp.miptexinfo;

		memset(&bm->loadtemp, 0, sizeof(bm->loadtemp));

		return bm;
	}
};

const char *GetBSPModeDesc(int modenum)
{
	switch(modenum)
	{
	default:
	case MODE_PVSONLY_MULTI:
#ifdef USEBINDLESS
		return "PVS only, multidrawindirect";
#else
		return "PVS only, multiple draws";
#endif

#ifdef USEBINDLESS
	case MODE_PVSONLY_NV:
		return "PVS only, onedraw (nv)";

	case MODE_FRUSTUM_NV:
		return "PVS+Frustum, onedraw (nv)";

	case MODE_WHOLEWORLD_NV:
		return "no culling, onedraw";
#endif

	case MODE_WHOLEWORLD_MULTI:
		return "no culling, multidrawindirect";
	}
}

//modeltype q1bsphl(30, bspmodel::LoadModel30, "hlbsp");
modeltype q1bsp29(29, bspmodel::LoadModel29, "q1bsp");
modeltype q1bsp2(('B'<<0)|('S'<<8)|('P'<<16)|('2'<<24), bspmodel::LoadModel2, "q1bsp2");
