User:Kmcguire
User Pages
Reading CLR PE32 File
Useful Defines, External API, References
#define UINT8_AT(x) (*((uint8_t*)(x)))
#define UINT16_AT(x) (*((uint16_t*)(x)))
#define UINT32_AT(x) (*((uint32_t*)(x)))
Also the memcpy, memset, malloc, strcmp, and free from the C standard library are used. I also use printf but only for diagnostic purposes.
Also, the page here has been very useful: http://ntcore.com/files/dotnetformat.htm.
Portable Executable 32 File Header And NT Header Structure
typedef struct {
uint32_t signature;
uint16_t cputype;
uint16_t objcnt;
uint32_t tdstamp;
uint32_t reserved1;
uint32_t reserved2;
uint16_t nthdrsize;
uint16_t flags;
uint16_t reserved3;
uint16_t lmajor;
uint16_t lminor;
uint16_t reserved4;
uint32_t reserved5;
uint32_t reserved6;
uint32_t entrypointrva;
uint32_t reserved7;
uint32_t reserved8;
uint32_t imagebase;
uint32_t objectalign;
uint32_t filealign;
uint16_t osmajor;
uint16_t osminor;
uint16_t usermajor;
uint16_t userminor;
uint16_t subsysmajor;
uint16_t subsysminor;
uint32_t reserved9;
uint32_t imagesize;
uint32_t hdrsize;
uint32_t filechecksum;
uint16_t subsystem;
uint16_t dllflags;
uint32_t stackreservesize;
uint32_t stackcommitsize;
uint32_t heapreservesize;
uint32_t heapcommitsize;
uint32_t reserved10;
uint32_t _rvasizes;
struct {
uint32_t rva;
uint32_t size;
} tables[];
} win32_pe32_fhdr;
#define WIN32_PE32_TABLE_EXPORT 0x00
#define WIN32_PE32_TABLE_IMPORT 0x01
#define WIN32_PE32_TABLE_RESOURCE 0x02
#define WIN32_PE32_TABLE_EXCEPTION 0x03
#define WIN32_PE32_TABLE_SECURITY 0x04
#define WIN32_PE32_TABLE_FIXUP 0x05
#define WIN32_PE32_TABLE_DEBUG 0x06
#define WIN32_PE32_TABLE_IMAGEDESC 0x07
#define WIN32_PE32_TABLE_MACHSPEC 0x08
#define WIN32_PE32_TABLE_THREADLOC 0x09
#define WIN32_PE32_TABLE_UNKNOWN1 0x0A
#define WIN32_PE32_TABLE_UNKNOWN2 0x0B
#define WIN32_PE32_TABLE_IMPORTADDR 0x0C
#define WIN32_PE32_TABLE_UNKNOWN3 0x0D
#define WIN32_PE32_TABLE_CLRRTHDR 0x0E
Common Language Runtime Header
typedef struct {
uint32_t rva;
uint32_t size;
} win32_clr20_datadir;
/* http://ntcore.com/files/dotnetformat.htm */
typedef struct {
uint32_t cb;
uint16_t rtmajor;
uint16_t rtminor;
win32_clr20_datadir meta;
uint32_t flags;
uint32_t entrypointrva; /* entry point rva */
win32_clr20_datadir resources; /* resources */
win32_clr20_datadir strongnamsig; /* strong name signature */
win32_clr20_datadir codemantab; /* code manager table */
win32_clr20_datadir vtabfixups; /* vtable fixups */
win32_clr20_datadir exaddrtabjumps; /* export address table jumps */
} win32_clr20_hdr;
CLR Meta-Data Container Structures
These are custom structures that I made to hold the data as I read and process it. They are not defined inside the PE32 file nor by the standard. These structures are just a natural and hopefully simple way to hold and access the data.
The #~ stream (win_clr20_meta_stream) is converted into an array of tables (win_clr20_meta_table). I am not sure why I prefixed them with "win_" I would remove it if I did it over, but for now I will just leave it there.
The win_clr20_module is the main structure which also holds the win_clr20_meta structure. In most of my functions I like to hold on to either the meta or module structure since I can access anything I need.
typedef struct {
/*
The meta-data contains streams. There are:
#~
#GUID
#US
#Strings
Each of these is stored in this structure.
*/
uint8_t *name; /* ASCII name */
uint8_t *data; /* if non-zero is buffer */
uint32_t size; /* size in bytes */
} win_clr20_meta_stream;
typedef struct {
/*
This structure holds the table once it has been
read from the meta-data #~ stream. It holds the
rows in a fixed static grid. You can get the size
of each row (which is constant), and the size of
each field in the row. However, there is no naming
per field.
*/
uint32_t rowcnt;
uint8_t rsize; /* row size in bytes */
uint8_t *fsize; /* size of each field from left to right in bytes */
uint8_t *rows; /* array of pointers to row data for each row */
} win_clr20_meta_table;
typedef struct {
/*
This is the main structure for the meta-data. It
should allow access to other information from this
structure.
*/
uint16_t strmcnt; /* the number of array items below */
win_clr20_meta_stream *strms; /* an array just like they are read in */
win_clr20_meta_stream strmbyid[5]; /* copy of strms, but indexed by WIN_CLR20_META_ */
uint8_t heapoffsetsizes; /* heap offset sizes */
uint32_t valid[2]; /* qword(64bit) tells which tables are present */
uint32_t sorted[2]; /* qword(64bit) tells which tables are sorted */
win_clr20_meta_table tables[64]; /* all the known tables that are supported */
} win_clr20_meta;
/*
A quick bit guide to find the right one.
*/
#define CLR20_META_HEAPOFFSETSIZES_STRING 0x01
#define CLR20_META_HEAPOFFSETSIZES_GUID 0x02
#define CLR20_META_HEAPOFFSETSIZES_BLOB 0x03
/*
The identifier for each stream. This is
specific to win_clr20_meta. These values
are internal to this code.
*/
#define WIN_CLR20_META_TABLES 0x00
#define WIN_CLR20_META_USTRINGS 0x01
#define WIN_CLR20_META_STRINGS 0x02
#define WIN_CLR20_META_BLOB 0x03
#define WIN_CLR20_META_GUID 0x04
typedef struct {
uintptr_t rva;
uintptr_t size;
uint8_t *data;
} win_clr20_chunk;
typedef struct {
/*
A container for the meta-data and
for any other structures.
Also included are the object sections
like .text, .reloc, ect. These are kept
because the meta data will reference
into this sections. So we need to keep
them close, but they are not actual
meta data so they are outside of the
meta structure.
*/
win_clr20_meta meta;
uint32_t chunkcnt;
win_clr20_chunk *chunks;
} win_clr20_module;
Accessing CLR Header Through PE32 Headers
Here, I access the CLR header by walking the PE32 file and optional DLL headers. I also have to peek into the data dictionaries. The file sections (.text, ...) are copied into what I named chunks which are saved and act just like sections just using a different name. The CLR meta-data will point into these chunks at a later time.
int cilvm_pe32_buffer(uint8_t *buffer, uint32_t len, win_clr20_module *clrmodule)
/*
Will read a CLR module in the PE32 format into a memory structure.
*/
{
win32_pe32_fhdr *fhdr;
win32_pe32_ohdr *ohdr;
uint32_t pe_off;
uint32_t opt_off;
uint16_t opt_magic;
win_clr20_hdr *clr_hdr;
uint32_t clr_rva;
uint32_t clr_size;
uint32_t clr_off;
uint32_t meta_off;
uint32_t meta_rva;
uint32_t meta_size;
uint32_t x;
uint32_t y;
uint8_t *tb;
/* get pe32 header offset */
pe_off = ((uint32_t*)(buffer + 0x3c))[0];
printf("peoff:%x\n", pe_off);
if(memcmp(buffer + pe_off, "PE\0\0", 4) == 0)
{
printf("Yes, this is a PE header.\n");
}
fhdr = (win32_pe32_fhdr*)(buffer + pe_off);
printf("imagebase: %x\n", fhdr->imagebase);
/* get clr runtime header */
clr_rva = fhdr->tables[WIN32_PE32_TABLE_CLRRTHDR].rva;
clr_size = fhdr->tables[WIN32_PE32_TABLE_CLRRTHDR].size;
printf("CLRRTHDR_RVA:%x\nCLRRTHDR_SIZE:%x\n", clr_rva, clr_size);
/*
The array of objects follow the headers. But, my header does
not include the size for the data dictionary array. So to
include that we get the number of entries in the data dictionary
and multiple this by eight since each one is eight bytes in PE32.
*/
ohdr = (win32_pe32_ohdr*)(buffer + pe_off + sizeof(win32_pe32_fhdr) + (fhdr->_rvasizes * 8));
/*
I also copy each section into a chunk. I used a different name incase
we start reading from stuff other than PE32 files and the word chunk
seemed cool to use.
*/
clrmodule->chunkcnt = fhdr->objcnt;
clrmodule->chunks = (win_clr20_chunk*)malloc(sizeof(win_clr20_chunk) * fhdr->objcnt);
for(x = 0; x < fhdr->objcnt; ++x)
{
clrmodule->chunks[x].rva = ohdr[x].rva;
clrmodule->chunks[x].size = ohdr[x].psize;
clrmodule->chunks[x].data = (uint8_t*)malloc(ohdr[x].psize);
memcpy(clrmodule->chunks[x].data, buffer + ohdr[x].poff, ohdr[x].psize);
}
for(x = 0; x < fhdr->objcnt; ++x)
{
printf("object rva %x %s\n", ohdr[x].rva, &ohdr[x].name[0]);
if(clr_rva >= ohdr[x].rva)
{
/* calculating relative to file offset not memory */
clr_off = ohdr[x].poff + (clr_rva - ohdr[x].rva);
printf("found object for CLRRTHDR, with rva %x\n", ohdr[x].rva);
break;
}
}
clr_hdr = (win_clr20_hdr*)(buffer + clr_off);
/* try to read in the meta data */
printf("clr-metatable:%x [size:%x]\n", clr_hdr->meta.rva, clr_hdr->meta.size);
meta_rva = clr_hdr->meta.rva;
meta_size = clr_hdr->meta.size;
/* find the object that the metadata is in and point header there */
for(x = 0; x < fhdr->objcnt; ++x)
{
printf("object rva %x %s\n", ohdr[x].rva, &ohdr[x].name[0]);
if(meta_rva >= ohdr[x].rva)
{
/* calculating relative to file offset not memory */
meta_off = ohdr[x].poff + (meta_rva - ohdr[x].rva);
printf("found object for CLR-METADATA, with rva %x\n", ohdr[x].rva);
break;
}
}
/* call routine to read meta-data header */
cilvm_clr_meta_read(buffer + meta_off, meta_size, &clrmodule->meta);
/* after reading in the meta-data we need to process it into a structure */
cilvm_clr_meta_tread(&clrmodule->meta);
return 1;
}
The cilvm_clr_meta_read and cilvm_clr_meta_tread are not yet shown, but there each perform a small part in the eventual construction of a high level structure where the common intermediate language instructions can be accessed and processing performed.
Read CLR Meta-Data Streams Into Memory
int cilvm_clr_meta_read(uint8_t *buffer, uint32_t len, win_clr20_meta *meta)
/*
This will read the meta-data streams.
*/
{
uint16_t ver_major;
uint16_t ver_minor;
uint16_t ver_strlen;
uint16_t strmcnt;
uint32_t x;
uint32_t y;
uint16_t strm_off;
uint16_t strm_size;
uint8_t *_buffer;
_buffer = buffer;
if(UINT32_AT(buffer) != 0x424A5342)
{
printf("invalid clr metadata signature\n");
return 0;
}
ver_major = UINT16_AT(buffer + 4 + 0);
ver_minor = UINT16_AT(buffer + 4 + 2);
printf("meta data ver:%x.%x\n", ver_major, ver_minor);
ASSERT((ver_major == 1) && (ver_minor == 1));
/* jump over signature, version, and reserved */
buffer += 4 + 4 + 4;
/* display and jump over version string */
ver_strlen = UINT32_AT(buffer);
ASSERT((ver_strlen & 0x3) == 0x0); /* has to be multiple of 4 */
printf("ver_strlen:%u\n", ver_strlen);
printf("ver:%s\n", buffer + 4);
buffer += 4 + ver_strlen;
/* jump over flags; 16 bit field; supposed to always be zero;
but apparently it is not always zero oddly enough..
ASSERT(UINT16_AT(buffer) != 0x0); */
buffer += 2;
strmcnt = UINT16_AT(buffer);
buffer += 2;
printf("MetaDataStreamCount:%u\n", strmcnt);
/* read each metadata stream header */
meta->strmcnt = strmcnt;
meta->strms = (win_clr20_meta_stream*)malloc(sizeof(win_clr20_meta_stream) * strmcnt);
for(x = 0; x < strmcnt; ++x)
{
/* read offset and size then copy into newly allocated buffer */
printf("off:%x,size:%x,name:%s\n", UINT32_AT(buffer), UINT32_AT(buffer + 4), buffer + 8);
meta->strms[x].data = (uint8_t*)malloc(UINT32_AT(buffer + 4));
memcpy(meta->strms[x].data, _buffer + UINT32_AT(buffer), UINT32_AT(buffer + 4));
buffer += 4 + 4;
/* read stream name (stop at zero byte) */
y = 0;
while(buffer[y] != 0x00)
{
++y;
}
++y;
y = ((y & 3) > 0 ? (y & (~3)) + 4 : y);
meta->strms[x].name = (uint8_t*)malloc(y + 1);
memcpy(meta->strms[x].name, buffer, y + 1);
/* advance to next offset and size pair */
buffer += y;
}
/* give each stream a home at a consistent index */
for(x = 0; x < meta->strmcnt; ++x)
{
if(strcmp(meta->strms[x].name, "#~") == 0)
{
meta->strmbyid[WIN_CLR20_META_TABLES] = meta->strms[x];
}
if(strcmp(meta->strms[x].name, "#US") == 0)
{
meta->strmbyid[WIN_CLR20_META_USTRINGS] = meta->strms[x];
}
if(strcmp(meta->strms[x].name, "#Strings") == 0)
{
meta->strmbyid[WIN_CLR20_META_STRINGS] = meta->strms[x];
}
if(strcmp(meta->strms[x].name, "#Blob") == 0)
{
meta->strmbyid[WIN_CLR20_META_BLOB] = meta->strms[x];
}
if(strcmp(meta->strms[x].name, "#GUID") == 0)
{
meta->strmbyid[WIN_CLR20_META_GUID] = meta->strms[x];
}
}
return 1;
}
Meta-Data #~ Stream Table Field Descriptions
This allows us to write a function to interpret the table data that follows the header in the #~ meta-data stream. The indexes can vary in size by what tables they reference so we have to create a letter for each specific index which refers to specific tables so that during run-time we can calculate the index size needed.
In my opinion this was done to conserve space, but I am not sure.
/*
The best way I could think of was to create a large table
that has 64 entries. Since at most with 2.0 there can be
a maximum of 64 tables. At this time not all of these are
used. So I only populate the indexes that are for a specific
table.
What I do is describe the fields in a row for each table. Since
each table has different fields per row each entry is different.
Also, most of the fields are index types. And since the index
size can change to, apparently conserve space, I use these identifiers
so that when I actually go to use them I calculate their size before
hand. The '1', '2', and '4' are always this size.
1 - 8 bit field
2 - 16 bit field
4 - 32 bit field
S - index to String table
G - index to GUID table
R - resolution scope index (Module, ModuleRef, AssemblyRef, TypeRef)
T - TypeDefOrRef index (TypeRef, TypeDef, TypeSpec)
F - Field table index
M - MethodDef table index
B - Blob table index
P - Param table index
D - TypeDef table index (different from T)
E - MemberRefParent index (TypeRef, ModuleRef, MethodDef, TypeSpec, TypeDef)
H - HasConstant index (Param, Field, Property)
C - HasCustomAttribute index (**AnyTable** Except It's Self)
I - CustomAttributeType index (MethodDef, MethodRef)
O - HasFieldMarshal index (Field, Param)
A - HasDesclSecurityIndex (TypeDef, MethodDef, Assembly)
V - Event table index
Y - Property table index
S - HasSemantics index (Event, Property)
U - MemberForwarded index (Field, MethodDef)
B - Implementation index (File, AssemblyRef, ExportedType)
X - CustomAttributeType index (MethodDef, MethodRef)
W - ResolutionScope index (Module, ModuleRef, AssemblyRef, TypeRef)
N - ModuleRef table index
J - AssemblyRef table index
K - TypeOrMethodDef index (TypeDef, MethodDef)
L - GenericParam table index
*/
/*
This is the table's ID.
*/
#define CLR20_META_TMODULE 0
#define CLR20_META_TTYPEREF 1
#define CLR20_META_TTYPEDEF 2
#define CLR20_META_TFIELD 4
#define CLR20_META_TMETHODDEF 6
#define CLR20_META_TPARAM 8
#define CLR20_META_TIFACEIMPL 9 /* interface implementation table */
#define CLR20_META_TMEMBERREF 10
#define CLR20_META_TCONSTANT 11
#define CLR20_META_TCUSTOMATTRI 12
#define CLR20_META_TFIELDMARSHAL 13
#define CLR20_META_TDECLSECURITY 14
#define CLR20_META_TCLASSLAYOUT 15
#define CLR20_META_TFIELDLAYOUT 16
#define CLR20_META_TSTANDALONGSIG 17
#define CLR20_META_TEVENTMAP 18
#define CLR20_META_TEVENT 20
#define CLR20_META_TPROPERTY 23
#define CLR20_META_TMETHODSEMAN 24
#define CLR20_META_TMETHODIMPL 25
#define CLR20_META_TMODULEREF 26
#define CLR20_META_TIMPLMAP 28
#define CLR20_META_TFIELDRVA 29
#define CLR20_META_TASSEMBLY 32
#define CLR20_META_TASSEMBLYPROC 33
#define CLR20_META_TASSEMBLYOS 34
#define CLR20_META_TASSEMBLYREF 35
#define CLR20_META_TASSEMBLYREFPROC 36
#define CLR20_META_TASSEMBLYREFOS 37
#define CLR20_META_TFILE 38
#define CLR20_META_TEXPORTEDTYPE 39
#define CLR20_META_TMANIFESTRES 40
#define CLR20_META_TNESTEDCLASS 41
#define CLR20_META_TGENERICPARAM 42
#define CLR20_META_TGENERICPARAMCONST 44 /* generic parameter contraint */
#define CLR20_META_TPROPERTYMAP 21
#define CLR20_META_TTYPESPEC 27
#define CLR20_META_TMETHODREF 63 /* WHERE IS THIS ONE??? */
/*
This is the field description structure. It is created at
compile time and is accessed during run-time to determine
the row length so that it can be copied into a more suitable
structure for dynamic access.
*/
uint8_t *WIN_CLR20_META_TFIELDS[64] =
{
[CLR20_META_TMODULE] = "2SGGG",
[CLR20_META_TTYPEREF] = "RSS", /* META_TTYPEREF */
[CLR20_META_TTYPEDEF] = "4SSTFM", /* META_TTYPEDEF */
[CLR20_META_TFIELD] = "2SB", /* META_TFIELD */
[CLR20_META_TMETHODDEF] = "422SBP", /* META_TMETHODDEF */
[CLR20_META_TPARAM] = "22S", /* META_TPARAM */
[CLR20_META_TIFACEIMPL] = "DT", /* META_TIFACEIMPL */
[CLR20_META_TMEMBERREF] = "ESB", /* META_TMEMBERREF */
[CLR20_META_TCONSTANT] = "11HB", /* META_TCONSTANT */
[CLR20_META_TCUSTOMATTRI] = "CIB", /* META_TCUSTOMATTRI */
[CLR20_META_TFIELDMARSHAL] = "OB", /* META_TFIELDMARSHAL */
[CLR20_META_TDECLSECURITY] = "2AB", /* META_TDECLSECURITY */
[CLR20_META_TCLASSLAYOUT] = "24D", /* META_TCLASSLAYOUT */
[CLR20_META_TFIELDLAYOUT] = "4F",
[CLR20_META_TSTANDALONGSIG] = "B",
[CLR20_META_TEVENTMAP] = "DV",
[CLR20_META_TEVENT] = "2ST",
[CLR20_META_TPROPERTYMAP] = "DY",
[CLR20_META_TPROPERTY] = "2SB",
[CLR20_META_TMETHODSEMAN] = "2MS",
[CLR20_META_TMETHODIMPL] = "DII",
[CLR20_META_TMODULEREF] = "S",
[CLR20_META_TTYPESPEC] = "B",
[CLR20_META_TIMPLMAP] = "2USN",
[CLR20_META_TFIELDRVA] = "4F",
[CLR20_META_TASSEMBLY] = "422224BSS",
[CLR20_META_TASSEMBLYPROC] = "4",
[CLR20_META_TASSEMBLYOS] = "444",
[CLR20_META_TASSEMBLYREF] = "22224BSSB",
[CLR20_META_TASSEMBLYREFPROC] = "4J",
[CLR20_META_TASSEMBLYREFOS] = "444J",
[CLR20_META_TFILE] = "4SB",
[CLR20_META_TEXPORTEDTYPE] = "44SSB",
[CLR20_META_TMANIFESTRES] = "44SB",
[CLR20_META_TNESTEDCLASS] = "DD",
[CLR20_META_TGENERICPARAM] = "22KS",
[CLR20_META_TGENERICPARAMCONST]="LT"
};
Reading Meta-Data #~ Stream Tables
A wise point to note is that although the code below can handle all the major tables defined by the ECMA-335 standard for the CLI there do exist other tables that can be present which may not be standard. If one of these tables exists then the code below may fail and measures should be taken to catch this situation. I have included a comment at the bottom of this function which shows where this check needs to be. Since if this unknown table happens between some known tables then there is no way that I know of to skip it because you have to know the size of each row to know it's total size.
int cilvm_clr_meta_tread(win_clr20_meta *meta)
/*
Processes the CLR meta-data by inflating the table stream
into a binary structure which can be more easily accessed
later.
All of the meta-data is stored in a stream like format. It
is difficult to randomly access the data in this format as
it lacks any block like structure. This function will read
the stream and create a block like structure which can be
easily accessed in a random manner.
*/
{
uint32_t x;
uint32_t y;
uint32_t z;
uint32_t w;
uint32_t v;
uint8_t *tbuf; /* table stream buffer */
uint8_t ver_major; /* major version */
uint8_t ver_minor; /* minor version */
uint8_t fsize[26]; /* field size for each type in bytes */
memset(fsize, 0, sizeof(fsize));
memset(meta->tables, 0, sizeof(meta->tables));
/* We are going to convert the stream into a structure */
tbuf = meta->strmbyid[WIN_CLR20_META_TABLES].data;
/* The first DWORD is supposed to be reserved and zero. */
ASSERT(UINT32_AT(tbuf) == 0);
tbuf += 4;
/* The major and minor version. */
ver_major = UINT8_AT(tbuf + 0);
ver_minor = UINT8_AT(tbuf + 1);
tbuf += 2;
/* http://ntcore.com/files/dotnetformat.htm
The heap offset sizes. This field is very important, it's a byte that
tells us the size that indexes into the "#String", "#GUID" and "#Blob"
streams will have. I paste you the description and the bit mask from
the SDK
Bit mask Description
0x01 Size of “#String” stream >= 2^16.
0x02 Size of “#GUID” stream >= 2^16
0x04 Size of “#Blob” stream >= 2^16.
If zero index are 16 bits and if one index are 32 bits.
*/
meta->heapoffsetsizes = UINT8_AT(tbuf++);
/* Reserved byte which should be zero.. */
/* ASSERT(UINT8_AT(tbuf) == 0); */
++tbuf;
/* http://ntcore.com/files/dotnetformat.htm
It's a bitmask-qword that tells us which MetaData Tables are present in
the assembly. Of course, since this is a qword the maximum number of
tables is 64. However, most of the tables aren't defined yet. So, the
high bits of this qword are always 0.
*/
meta->valid[0] = UINT32_AT(tbuf + 0);
meta->valid[1] = UINT32_AT(tbuf + 4);
tbuf += 8;
printf("TablesValid:%04x%04x\n", meta->valid[0], meta->valid[1]);
/* Also a bitmask-qword. It tells us which tables are sorted. */
meta->sorted[0] = UINT32_AT(tbuf + 0);
meta->sorted[1] = UINT32_AT(tbuf + 4);
tbuf += 8;
/* http://ntcore.com/files/dotnetformat.htm
Following there's an array of dwords with the number of rows for each
present table. Ok this has to be explained. For every table there can
be n rows. Let's say we have three tables: A, B and C. And the Valid
mask tells us that the B table is not present, but A and C are. In
this case there will be 2 dwords (not three), one for the rows in the
A table and one for the C table. The B table rows are skipped since
there is no B table in the assembly.
*/
for(y = 0; y < 2; ++y)
{
for(x = 0; x < 32; ++x)
{
if(((meta->valid[y] >> x) & 0x1) == 0x1)
{
/* This table is present. */
printf("Table[%u] Present With %u Rows\n", x + (y * 32), UINT32_AT(tbuf));
meta->tables[x + (y * 32)].rowcnt = UINT32_AT(tbuf);
tbuf += 4;
}else{
meta->tables[x + (y * 32)].rowcnt = 0;
}
}
}
/* I need to calculate the size of any indexes that are used by
the dynamic field format table defined during compile-time.
Once the size of these indexes are defined then we can continue
by loading the actual data for the meta-data tables.
*/
fsize['1'] = 1;
fsize['2'] = 2;
fsize['4'] = 4;
/* Either a 4 byte or 2 byte index needed depending on bit set. */
fsize['S'] = (meta->heapoffsetsizes & CLR20_META_HEAPOFFSETSIZES_STRING) ? 4 : 2;
fsize['G'] = (meta->heapoffsetsizes & CLR20_META_HEAPOFFSETSIZES_GUID) ? 4 : 2;
fsize['B'] = (meta->heapoffsetsizes & CLR20_META_HEAPOFFSETSIZES_BLOB) ? 4 : 2;
/* We or the row counts and if we have left something larger than
0xffff then this means that one of the tables is needs a 32 bit
index instead of a 16 bit one.
*/
fsize['R'] = (meta->tables[CLR20_META_TMODULE].rowcnt |
meta->tables[CLR20_META_TMODULEREF].rowcnt |
meta->tables[CLR20_META_TASSEMBLYREF].rowcnt |
meta->tables[CLR20_META_TTYPEREF].rowcnt) > 0xffff ? 4 : 2;
fsize['T'] = (meta->tables[CLR20_META_TTYPEREF].rowcnt |
meta->tables[CLR20_META_TTYPEDEF].rowcnt |
meta->tables[CLR20_META_TTYPESPEC].rowcnt) > 0xffff ? 4 : 2;
fsize['F'] = meta->tables[CLR20_META_TFIELD].rowcnt > 0xffff ? 4 : 2;
fsize['M'] = meta->tables[CLR20_META_TMETHODDEF].rowcnt > 0xffff ? 4 : 2;
fsize['P'] = meta->tables[CLR20_META_TPARAM].rowcnt > 0xffff ? 4 : 2;
fsize['D'] = meta->tables[CLR20_META_TTYPEDEF].rowcnt > 0xffff ? 4 : 2;
fsize['E'] = (meta->tables[CLR20_META_TTYPEREF].rowcnt |
meta->tables[CLR20_META_TMODULEREF].rowcnt |
meta->tables[CLR20_META_TMETHODDEF].rowcnt |
meta->tables[CLR20_META_TTYPESPEC].rowcnt |
meta->tables[CLR20_META_TTYPEDEF].rowcnt) > 0xffff ? 4 : 2;
fsize['H'] = (meta->tables[CLR20_META_TPARAM].rowcnt |
meta->tables[CLR20_META_TFIELD].rowcnt |
meta->tables[CLR20_META_TPROPERTY].rowcnt) > 0xffff ? 4 : 2;
/* A bit of a special case here. It can reference any table but it's self. */
for(x = 0, y = 0; x < 64; ++x)
{
if(x != CLR20_META_TCUSTOMATTRI)
{
y = y | meta->tables[x].rowcnt;
}
}
fsize['C'] = y > 0xffff ? 4 : 2;
/* Oddly, the document says there is a MethodRef table? Where? If there
does exist one then it should be considered for 'K' here. Since at
the moment I have no idea what to do but just ignore it.
*/
fsize['I'] = (meta->tables[CLR20_META_TMETHODDEF].rowcnt |
meta->tables[CLR20_META_TMETHODREF].rowcnt) > 0xffff ? 4 : 2;
fsize['O'] = (meta->tables[CLR20_META_TFIELD].rowcnt |
meta->tables[CLR20_META_TPARAM].rowcnt) > 0xffff ? 4 : 2;
fsize['A'] = (meta->tables[CLR20_META_TTYPEDEF].rowcnt |
meta->tables[CLR20_META_TMETHODDEF].rowcnt |
meta->tables[CLR20_META_TASSEMBLY].rowcnt) > 0xffff ? 4 : 2;
fsize['V'] = meta->tables[CLR20_META_TEVENT].rowcnt > 0xffff ? 4 : 2;
fsize['Y'] = meta->tables[CLR20_META_TPROPERTY].rowcnt > 0xffff ? 4 : 2;
fsize['S'] = (meta->tables[CLR20_META_TEVENT].rowcnt |
meta->tables[CLR20_META_TPROPERTY].rowcnt) > 0xffff ? 4 : 2;
fsize['U'] = (meta->tables[CLR20_META_TFIELD].rowcnt |
meta->tables[CLR20_META_TMETHODDEF].rowcnt) > 0xffff ? 4 : 2;
fsize['B'] = (meta->tables[CLR20_META_TFILE].rowcnt |
meta->tables[CLR20_META_TASSEMBLYREF].rowcnt |
meta->tables[CLR20_META_TEXPORTEDTYPE].rowcnt) > 0xffff ? 4 : 2;
fsize['X'] = (meta->tables[CLR20_META_TMETHODDEF].rowcnt |
meta->tables[CLR20_META_TMETHODREF].rowcnt) > 0xffff ? 4 : 2;
fsize['W'] = (meta->tables[CLR20_META_TMODULE].rowcnt |
meta->tables[CLR20_META_TMODULEREF].rowcnt |
meta->tables[CLR20_META_TASSEMBLYREF].rowcnt |
meta->tables[CLR20_META_TTYPEREF].rowcnt) > 0xffff ? 4 : 2;
fsize['N'] = meta->tables[CLR20_META_TMODULEREF].rowcnt > 0xffff ? 4 : 2;
fsize['J'] = meta->tables[CLR20_META_TASSEMBLYREF].rowcnt > 0xffff ? 4 : 2;
fsize['K'] = (meta->tables[CLR20_META_TTYPEDEF].rowcnt |
meta->tables[CLR20_META_TMETHODDEF].rowcnt) > 0xffff ? 4 : 2;
fsize['L'] = meta->tables[CLR20_META_TGENERICPARAM].rowcnt > 0xffff ? 4 : 2;
/* http://ntcore.com/files/dotnetformat.htm
As you can see, some numbers are missing, that's because some tables,
as I said before, are not defined yet. It's important you understand
how the tables are stored. A table is made of an array of rows; a row
is a structure (let's call it this way for the moment to make things
easier). After the rows of a given table end, the rows of the next
table follow. The problem with a row (remember, think of it like a
structure) is that some of its fields aren't always of the same size
and they change from assembly to assembly, so you have to calculate
them dynamically.
*/
for(y = 0; y < 2; ++y)
{
for(x = 0; x < 32; ++x)
{
if(((meta->valid[y] >> x) & 0x1) == 0x1)
{
/* the table identifier */
w = x + (y * 32);
/*
Here is where a check needs
to be made to ensure that
this is not a unknown table.
A unknown table will have no
row field string so you might
also get a crash here...
*/
/* calculate the size of each row */
for(z = 0; WIN_CLR20_META_TFIELDS[w][z] != 0; ++z);
meta->tables[w].fsize = (uint8_t*)malloc(z);
for(v = 0, z = 0; WIN_CLR20_META_TFIELDS[w][z] != 0; ++z)
{
v += fsize[WIN_CLR20_META_TFIELDS[w][z]];
meta->tables[w].fsize[z] = fsize[WIN_CLR20_META_TFIELDS[w][z]];
}
meta->tables[w].fcnt = z;
/* read the number of rows specified by row size */
meta->tables[w].rsize = v;
meta->tables[w].rows = (uint8_t*)malloc(meta->tables[w].rowcnt * v);
memcpy(meta->tables[w].rows, tbuf, meta->tables[w].rowcnt * v);
tbuf += meta->tables[w].rowcnt * v;
printf("read %u rows of %u bytes each for a total of %u.\n",
meta->tables[w].rowcnt, v, meta->tables[w].rowcnt * v);
}
}
}
return 1;
}
The tables have now been read into their own structure. The data for all the rows is pointed to by meta->tables[w].rows.
Meta-Data Method Definition Table Field Indexes
/*
ECMA-335 4th Edition Section 22
Table Field Indexes
These are the actual index of each
field in the row for each table denoted.
Although, each field has a certain size
and most index type fields can actually
vary in size from one module to the next.
This normally serves as a constant to pass
to the actual helper function which can
access fields in a specified table and row.
So now you have a textual name to reference
instead of having to remember arbitray index.
*/
#define CLR20_TASSEMBLY_HASHALGID 0
#define CLR20_TASSEMBLY_VERMAJOR 1
#define CLR20_TASSEMBLY_VERMINOR 2
#define CLR20_TASSEMBLY_BUILDNUM 3
#define CLR20_TASSEMBLY_REVISION 4
#define CLR20_TASSEMBLY_PUBLICKEY 5
#define CLR20_TASSEMBLY_NAME 6
#define CLR20_TASSEMBLY_CULTURE 7
#define CLR20_TASSEMBLYOS_OSPLATFORMID 0
#define CLR20_TASSEMBLYOS_OSMAJORVERSION 1
#define CLR20_TASSEMBLYOS_OSMINORVERSION 2
#define CLR20_TASSEMBLYPROC_PROCESSOR 0
#define CLR20_TASSEMBLYREF_VERMAJOR 0
#define CLR20_TASSEMBLYREF_VERMINOR 1
#define CLR20_TASSEMBLYREF_BUILDNUM 2
#define CLR20_TASSEMBLYREF_REVISION 3
#define CLR20_TASSEMBLYREFOS_OSPLATFORMID 0
#define CLR20_TASSEMBLYREFOS_OSVERMAJOR 1
#define CLR20_TASSEMBLYREFOS_OSVERMINOR 2
#define CLR20_TASSEMBLYREFOS_ASSEMBLYREF 3
#define CLR20_TASSEMBLYREFPROC_PROCESSOR 0
#define CLR20_TASSEMBLYREFPROC_ASSEMBLYREF 1
#define CLR20_TCLASSLAYOUT_PACKINGSIZE 0
#define CLR20_TCLASSLAYOUT_CLASSSIZE 1
#define CLR20_TCLASSLAYOUT_PARENT 2
#define CLR20_TCONSTANT_TYPE 0
#define CLR20_TCONSTANT_PARENT 1
#define CLR20_TCONSTANT_VALUE 2
#define CLR20_TCUSTOMATTRI_PARENT 0
#define CLR20_TCUSTOMATTRI_TYPE 1
#define CLR20_TCUSTOMATTRI_VALUE 2
#define CLR20_TDECLSECURITY_ACTION 0
#define CLR20_TDECLSECURITY_PARENT 1
#define CLR20_TDECLSECURITY_PERMISSIONSET 2
#define CLR20_TEVENTMAP_PARENT 0
#define CLR20_TEVENTMAP_EVENTLIST 1
#define CLR20_TEXPORTEDTYPE_FLAGS 0
#define CLR20_TEXPORTEDTYPE_TYPEDEFID 1
#define CLR20_TEXPORTEDTYPE_TYPENAME 2
#define CLR20_TEXPORTEDTYPE_TYPENAMESPACE 3
#define CLR20_TEXPORTEDTYPE_IMPLEMENTATION 4
#define CLR20_TFIELD_FLAGS 0
#define CLR20_TFIELD_NAME 1
#define CLR20_TFIELD_SIGNATURE 2
#define CLR20_TFIELDLAYOUT_OFFSET 0
#define CLR20_TFIELDLAYOUT_FIELD 1
#define CLR20_TFIELDMARSHAL_PARENT 0
#define CLR20_TFIELDMARSHAL_NATIVETYPE 1
#define CLR20_TFIELDRVA_RVA 0
#define CLR20_TFIELDRVA_FIELD 1
#define CLR20_TFILE_FLAGS 0
#define CLR20_TFILE_NAME 1
#define CLR20_TFILE_HASHVALUE 2
#define CLR20_TGENERICPARAM_NUMBER 0
#define CLR20_TGENERICPARAM_FLAGS 1
#define CLR20_TGENERICPARAM_OWNER 2
#define CLR20_TGENERICPARAM_NAME 3
#define CLR20_TGENERICPARAMCONST_OWNER 0
#define CLR20_TGENERICPARAMCONST_CONSTRAINT 1
#define CLR20_TIMPLMAP_MAPPINGFLAGS 0
#define CLR20_TIMPLMAP_MEMBERFORWARDED 1
#define CLR20_TIMPLMAP_IMPORTNAME 2
#define CLR20_TIMPLMAP_IMPORTSCOPE 3
#define CLR20_TIFACEIMPL_CLASS 0
#define CLR20_TIFACEIMPL_INTERFACE 1
#define CLR20_TMANIFESTRES_OFFSET 0
#define CLR20_TMANIFESTRES_FLAGS 1
#define CLR20_TMANIFESTRES_NAME 2
#define CLR20_TMANIFESTRES_IMPLEMENTATION 3
#define CLR20_TMEMBERREF_CLASS 0
#define CLR20_TMEMBERREF_NAME 1
#define CLR20_TMEMBERREF_SIGNATURE 2
#define CLR20_TMETHODDEF_RVA 0
#define CLR20_TMETHODDEF_IMPLFLAGS 1
#define CLR20_TMETHODDEF_FLAGS 2
#define CLR20_TMETHODDEF_NAME 3
#define CLR20_TMETHODDEF_SIGNATURE 4
#define CLR20_TMETHODDEF_PARAMLIST 5
#define CLR20_TMETHODIMPL_CLASS 0
#define CLR20_TMETHODIMPL_METHODBODY 1
#define CLR20_TMETHODIMPL_METHODDECLARATION 2
#define CLR20_TMETHODSEMAN_SEMANTICS 0
#define CLR20_TMETHODSEMAN_METHOD 1
#define CLR20_TMETHODSEMAN_ASSOCIATION 2
#define CLR20_TMETHODSPEC_METHOD 0
#define CLR20_TMETHODSPEC_INSTANTIATION 1
#define CLR20_TMODULE_GENERATION 0
#define CLR20_TMODULE_NAME 1
#define CLR20_TMODULE_MVID 2
#define CLR20_TMODULE_ENCID 3
#define CLR20_TMODULE_ENCBASEID 4
#define CLR20_TMODULEREF_NAME 0
#define CLR20_TNESTEDCLASS_NESTEDCLASS 0
#define CLR20_TNESTEDCLASS_ENCLOSINGCLASS 1
#define CLR20_TPARAM_FLAGS 0
#define CLR20_TPARAM_SEQUENCE 1
#define CLR20_TPARAM_NAME 2
#define CLR20_TPROPERTY_FLAGS 0
#define CLR20_TPROPERTY_NAME 1
#define CLR20_TPROPERTY_TYPE 2
#define CLR20_TPROPERTYMAP_PARENT 0
#define CLR20_TPROPERTYMAP_PROPERTYLIST 1
#define CLR20_TSTANDALONGSIG_SIGNATURE 0
#define CLR20_TTYPEDEF_FLAGS 0
#define CLR20_TTYPEDEF_TYPENAME 1
#define CLR20_TTYPEDEF_TYPENAMESPACE 2
#define CLR20_TTYPEDEF_EXTENDS 3
#define CLR20_TTYPEDEF_FIELDLIST 4
#define CLR20_TTYPEDEF_METHODLIST 5
#define CLR20_TTYPEREF_RESOLUTIONSCOPE 0
#define CLR20_TTYPEREF_TYPENAME 1
#define CLR20_TTYPEREF_TYPENAMESPACE 2
#define CLR20_TTYPESPEC_SIGNATURE 0
Displaying The Method Definition Table
Now that you have located the PE32 headers which allowed you to find the CLR run-time header which you then used to read the meta-data streams and finally the meta-data tables from the #~ stream you can now display the method definition table.
I wrote a utility function to access a row and field in each table. The function works like below:
win_clr20_meta_table *tmp;
tmp = &meta->tables[CLR20_META_TMETHODDEF];
for(x = 0; x < tmp->rowcnt; ++x)
{
td = tmp->rows + (x * tmp->rsize);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_RVA, &z);
printf("rva:%x ", z);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_IMPLFLAGS, &z);
printf("implflags:%x ", z);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_FLAGS, &z);
printf("flags:%x ", z);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_NAME, &z);
printf("Name:%x ", z);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_SIGNATURE, &z);
printf("Signature:%x ", z);
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_PARAMLIST, &z);
printf("ParamList:%x ", z);
printf("\n");
}
But, looks like this:
int cilvm_clr_treadrowfield(win_clr20_meta_table *table, uint32_t rndx, uint32_t fndx, void *ret)
/*
This will read a field and row both specified by an index starting from zero.
To check this function for success you should ensure that the return value is non-zero. This
function may fail if you try to access past the last row or last field. Or, somehow pass a
negative index.
*/
{
uint8_t x;
uint8_t *o;
if(rndx >= table->rowcnt)
{
return 0;
}
o = table->rows + (table->rsize * rndx);
if(fndx >= table->fcnt)
{
return 0;
}
for(x = 0; x < fndx; ++x)
{
o += table->fsize[x];
}
memcpy(ret, o, table->fsize[fndx]);
return 1;
}
Display Name And Signature For Each Method Defintion
You can also easily display the method definition name, signature, and find the chunk (section) by getting a pointer to where the method is located in the chunk.
uint32_t x;
uint32_t y;
uint32_t z;
win_clr20_meta_table *tmp;
uint8_t *mptr;
tmp = &clrmodule->meta.tables[CLR20_META_TMETHODDEF];
for(x = 0; x < tmp->rowcnt; ++x)
{
/* find the name */
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_NAME, &z);
mptr = clrmodule->meta.strmbyid[WIN_CLR20_META_STRINGS].data;
printf("%s\n", &mptr[z]);
/* find the signature */
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_SIGNATURE, &z);
mptr = clrmodule->meta.strmbyid[WIN_CLR20_META_BLOB].data;
printf("siglen:%x\nsig:", mptr[z]);
for(y = 0; y < mptr[z]; ++y)
{
printf("%02x-", mptr[z + 1 + y]);
}
printf("\n");
/* find the chunk that holds the method */
cilvm_clr_treadrowfield(tmp, x, CLR20_TMETHODDEF_RVA, &z);
if(cilvm_clr_getchunkptrbyrva(clrmodule, z, (void**)&mptr))
{
printf("found chunk holding method specified by rva\n");
}else{
printf("failed to find chunk holding rva specified by method.\n");
}
}
Display Type Names And Namespaces With Fields
int test2(win_clr20_module *clrmodule)
{
win_clr20_meta_table *mtp_typedef;
win_clr20_meta_table *mtp_field;
uint32_t x;
uint32_t y;
uint32_t z;
uint32_t w;
uint32_t tdef_flags;
uint8_t *tdef_typename;
uint8_t *tdef_typenamespace;
uint32_t tdef_fieldlist;
uint32_t tdef_methodlist;
uint8_t *clrm_strings;
uint16_t tfield_flags;
uint8_t *tfield_name;
uint8_t *tfield_signature;
clrm_strings = clrmodule->meta.strmbyid[WIN_CLR20_META_STRINGS].data;
mtp_typedef = &clrmodule->meta.tables[CLR20_META_TTYPEDEF];
mtp_field = &clrmodule->meta.tables[CLR20_META_TFIELD];
for(x = 0; x < mtp_typedef->rowcnt; ++x)
{
tdef_typename = 0;
tdef_typenamespace = 0;
tdef_fieldlist = 0;
cilvm_clr_treadrowfield(mtp_typedef, x, CLR20_TTYPEDEF_FLAGS, &tdef_flags);
cilvm_clr_treadrowfield(mtp_typedef, x, CLR20_TTYPEDEF_TYPENAME, &tdef_typename);
cilvm_clr_treadrowfield(mtp_typedef, x, CLR20_TTYPEDEF_TYPENAMESPACE, &tdef_typenamespace);
cilvm_clr_treadrowfield(mtp_typedef, x, CLR20_TTYPEDEF_FIELDLIST, &tdef_fieldlist);
cilvm_clr_treadrowfield(mtp_typedef, x, CLR20_TTYPEDEF_METHODLIST, &tdef_methodlist);
tdef_typename = &clrm_strings[(uintptr_t)tdef_typename];
tdef_typenamespace = &clrm_strings[(uintptr_t)tdef_typenamespace];
printf(".type %s.%s\n", tdef_typenamespace, tdef_typename);
/* figure out where the fields stop for this one */
if((x + 1) < mtp_typedef->rowcnt)
{
/*
According to ECMA-335-4th-S22.37, the field list for this type
stops where the field list for the next type def starts, so
look ahead one..
*/
z = 0;
cilvm_clr_treadrowfield(mtp_typedef, x + 1, CLR20_TTYPEDEF_FIELDLIST, &z);
}else{
/* This is the last type definition. So read until last row. */
z = mtp_field->rowcnt + 1;
}
/* read the fields for this type */
printf("tdef_fieldlist:%x\n", tdef_fieldlist);
for(y = tdef_fieldlist - 1; y < (z - 1); ++y)
{
tfield_flags = 0;
tfield_name = 0;
tfield_signature = 0;
cilvm_clr_treadrowfield(mtp_field, y, CLR20_TFIELD_FLAGS, &tfield_flags);
cilvm_clr_treadrowfield(mtp_field, y, CLR20_TFIELD_NAME, &tfield_name);
cilvm_clr_treadrowfield(mtp_field, y, CLR20_TFIELD_SIGNATURE, &tfield_signature);
tfield_name = &clrm_strings[(uintptr_t)tfield_name];
printf(".field[%u] %s\n", y, tfield_name);
}
}
return 1;
}