Mono源码学习-如何解析config文件

　　起因

　　为什么要选择解析config文件,主要是mono源码比较庞大且复杂.先从一个模块开始读源码.再者就是对用c语言如何解析xml文件,还是充满好奇的.可能是因为一直用c#的原因,因为.Net Framework 提供xpath/xml linq等方式解析,使用方便,也从来没有想过了解其中的原理.使用的时候也是针对业务的.所以这里带着这份好奇,开始了.

　　本文中涉及到config文件,其实默认存放在ect/mono目录下的.其实说mono解析config文件.不如说用eglib来解析简单类型xml文件.

　　函数调用流程

　　//--driver.c

　　mono_main //mono主函数

　　//---mono-config.c

　　mono_config_parse

　　mono_config_parse_file

　　mono_config_parse_file_with_context

　　g_file_get_contents //获取config文件内容和文件大小

　　mono_config_parse_xml_with_context //真正解析xml

　　mono_config_init //创建一个config_handlers哈希表

　　g_markup_parse_context_new

　　g_markup_parse_context_parse //按字符解析xml

　　g_markup_parse_context_end_parse

　　g_markup_parse_context_free //解析xml结束先从mono_main函数开始,源码函数行数较多,只保留跟解析config相关代码.

　　int mono_main (int argc, char* argv[])

　　{

　　//只保留,解析config文件相关变量

　　//...

　　char *config_file=NULL;

　　//....

　　for (i=1; i < argc; ++i)

　　{

　　if (strcmp(argv[i], "--config")==0) { //在运行时,获取--config对应的

　　if (i + 1 >=argc) {

　　fprintf(stderr, "error: --config requires a filename argument

　　");

　　return 1;

　　}

　　config_file=argv[++i]; //如果自考证书不指定--config选项,config_file为NULL

　　}

　　//...

　　mono_set_rootdir (); //设置etc和lib目录

　　//....

　　/* Parse gac loading options before loading assemblies. */

　　if (mono_compile_aot || action==DO_EXEC || action==DO_DEBUGGER) {

　　mono_config_parse (config_file); //调用,由于没有指定--config选项,这里为NULL

　　}

　　//...

　　}

　　mono_config_parse函数:

　　//解析config文件

　　void mono_config_parse (const char *filename) {

　　const char *home;

　　char *mono_cfg;

　　#ifndef TARGET_WIN32

　　char *user_cfg;

　　#endif

　　if (filename) { //由于filename为NULL,不为真

　　mono_config_parse_file (filename);

　　return;

　　}

　　//获取环境变量MONO_CONFIG

　　char *env_home=g_getenv ("MONO_CONFIG");

　　if (env_home) {

　　mono_config_parse_file (env_home);

　　return;

　　}

　　//在mono_main函数,mono_set_rootdir已经设置过mono的etc和lib目录

　　//mono_get_config_dir函数主要是获取mono_cfg_dir全局变量的值,mono_cfg_dir存放的是etc目录路径

　　//拼接具体config所在的路径

　　mono_cfg=g_build_filename (mono_get_config_dir (), "mono", "config", NULL);

　　mono_config_parse_file (mono_cfg); //解析config文件

　　g_free (mono_cfg);

　　#if !defined(TARGET_WIN32)

　　home=g_get_home_dir ();

　　user_cfg=g_strconcat (home, G_DIR_SEPARATOR_S, ".mono/config", NULL);

　　mono_config_parse_file (user_cfg);

　　g_free (user_cfg);

　　#endif

　　}先不上代码,先看一下图

　　mono加载config文件到内存,然后创建parse context,开始按字符解析xml

　　mono_config_parse_file源码

　　static void mono_config_parse_file (const char *filename)

　　{

　　ParseState state={NULL}; //初始化ParseState

　　state.user_data=(gpointer) filename; //user_data存放config所在路径

　　mono_config_parse_file_with_context (&state, filename); //读取config文件到内存上,并开始按字符解析

　　}mono_config_parse_file_with_context源码

　　/* If assembly is NULL, parse in the global context */

　　static int mono_config_parse_file_with_context (ParseState *state, const char *filename)

　　{

　　gchar *text;

　　gsize len;

　　gint offset;

　　mono_trace (G_LOG_LEVEL_INFO, MONO_TRACE_CONFIG,

　　"Config attempting to parse: '%s'.", filename);

　　//在函数内容对text进行分配内存空间,获取config文件大小和内容

　　if (!g_file_get_contents (filename, &text, &len, NULL))

　　return 0;

　　offset=0;

　　if (len > 3 && text [0]=='\xef' && text [1]==(gchar) '\xbb' && text [2]=='\xbf')

　　offset=3; /* Skip UTF-8 BOM */

　　if (state->user_data==NULL) //在一次判读user_data是否为空,为空,就保存filename的地址

　　state->user_data=(gpointer) filename;

　　//***重点***

　　mono_config_parse_xml_with_context (state, text + offset, len - offset);

　　g_free (text); //释放text指向的内存空间

　　return 1;

　　}g_file_get_contents代码较多,我简单用readfile函数进行替代,更容易理解

　　#define _CRT_SECURE_NO_WARNINGS

　　#include

　　* filename 要读取文件的名称

　　* content char类型二级指针,存放要读取文件的内容,返回

　　* len int类型指针,存放文件的长度,返回

　　int readfile(char* filename, char** content, int* len)

　　{

　　assert(filename !=NULL);

　　FILE* pfile=fopen(filename, "r");

　　assert(pfile !=NULL);

　　fseek(pfile, 0, SEEK_END); //1. 将文件指针移动到文件尾部

　　int total=ftell(pfile); //2. 获取文件的大小

　　fseek(pfile, 0, SEEK_SET); //3. 在将文件指针移动到文件头部,方便将文件读取到字符串

　　char* str=calloc(1, total + 1); //4. 根据文件大小进行分配空间

　　int result=fread(str, total + 1, sizeof(char), pfile); //5. 读取文件内容到str中

　　fclose(pfile);

　　str[total]='\0';

　　*content=str;

　　*len=total;

　　return 0;

　　}

　　int main(int argc, char* argv[])

　　{

　　char* filename="config";

　　char* content; //存放读取文件的内容

　　int len; //存放文件的长度

　　readfile(filename, &content, &len);

　　printf("%s

　　", content);

　　printf("--------------------------

　　");

　　printf("%d

　　", len);

　　return 0;

　　}

　　其实g_file_get_contents函数代码不是很多,只有50行左右,只是展示不是很方便,更多的留给

　　g_markup_parse_context_parse函数.

　　g_markup_parse_context_parse函数

　　因为这个一看结构较多,还有回调函数也很多.简单用图说一下

　　mono解析xml,转换器

　　gboolean

　　g_markup_parse_context_parse (GMarkupParseContext *context,

　　const gchar *text, gssize text_len,

　　GError **gerror)

　　{

　　const char *p, *end;

　　end=text + text_len; //根据文件大小,得出文件末尾的位置,方便下边循环有终止条件

　　//逐个字符遍历,进行提取

　　for (p=text; p < end; p++){

　　char c=*p;

　　switch (context->state){

　　case START:

　　if (c==' ' || c==' ' || c=='\f' || c=='

　　' || (c & 0x80))

　　continue;

　　if (c=='<'){

　　if (p+1 < end && p [1]=='?'){

　　context->state=SKIP_XML_DECLARATION;

　　p++;

　　} else

　　context->state=START_ELEMENT;

　　continue;

　　}

　　set_error ("%s", "Expected < to start the document");

　　goto fail;

　　case SKIP_XML_DECLARATION:

　　case START_ELEMENT: {

　　const char *element_start=p, *element_end;

　　char *ename=NULL;

　　int full_stop=0, l;

　　gchar **names=NULL, **values=NULL;

　　for (; p < end && my_isspace (*p); p++)

　　;

　　if (p==end){

　　set_error ("%s", "Unfinished element");

　　goto fail;

　　}

　　if (*p=='!' && (p+2 < end) && (p [1]=='-') && (p [2]=='-')){

　　context->state=COMMENT;

　　p +=2;

　　break;

　　}

　　if (!my_isnamestartchar (*p)){

　　set_error ("%s", "Expected an element name");

　　goto fail;

　　}

　　for (++p; p < end && my_isnamechar (*p); p++)

　　;

　　if (p==end){

　　set_error ("%s", "Expected an element");

　　goto fail;

　　}

　　element_end=p;

　　for (; p < end && my_isspace (*p); p++)

　　;

　　if (p==end){

　　set_error ("%s", "Unfinished element");

　　goto fail;

　　}

　　p=parse_attributes (p, end, &names, &values, gerror, &full_stop, context->state);

　　if (p==end){

　　if (names !=NULL) {

　　g_strfreev (names);

　　g_strfreev (values);

　　}

　　/* Only set the error if parse_attributes did not */

　　if (gerror !=NULL && *gerror==NULL)

　　set_error ("%s", "Unfinished sequence");

　　goto fail;

　　}

　　l=(int)(element_end - element_start);

　　ename=g_malloc (l + 1);

　　if (ename==NULL)

　　goto fail;

　　strncpy (ename, element_start, l);

　　ename [l]=0;

　　if (context->state==START_ELEMENT)

　　if (context->parser.start_element !=NULL)

　　context->parser.start_element (context, ename,

　　(const gchar **) names,

　　(const gchar **) values,

　　context->user_data, gerror);

　　if (names !=NULL){

　　g_strfreev (names);

　　g_strfreev (values);

　　}

　　if (gerror !=NULL && *gerror !=NULL){

　　g_free (ename);

　　goto fail;

　　}

　　if (full_stop){

　　if (context->parser.end_element !=NULL && context->state==START_ELEMENT){

　　context->parser.end_element (context, ename, context->user_data, gerror);

　　if (gerror !=NULL && *gerror !=NULL){

　　g_free (ename);

　　goto fail;

　　}

　　g_free (ename);

　　} else {

　　context->level=g_slist_prepend (context->level, ename);

　　}

　　context->state=TEXT;

　　break;

　　} /* case START_ELEMENT */

　　case TEXT: {

　　if (c=='<'){

　　context->state=FLUSH_TEXT;

　　break;

　　}

　　if (context->parser.text !=NULL){

　　if (context->text==NULL)

　　context->text=g_string_new ("");

　　g_string_append_c (context->text, c);

　　}

　　break;

　　}

　　case COMMENT:

　　if (*p !='-')

　　break;

　　if (p+2 < end && (p [1]=='-') && (p [2]=='>')){

　　context->state=TEXT;

　　p +=2;

　　break;

　　}

　　break;

　　case FLUSH_TEXT:

　　if (context->parser.text !=NULL && context->text !=NULL){

　　context->parser.text (context, context->text->str, context->text->len,

　　context->user_data, gerror);

　　if (gerror !=NULL && *gerror !=NULL)

　　goto fail;

　　}

　　if (c=='/')

　　context->state=CLOSING_ELEMENT;

　　else {

　　p--;

　　context->state=START_ELEMENT;

　　}

　　break;

　　case CLOSING_ELEMENT: {

　　GSList *current=context->level;

　　char *text;

　　if (context->level==NULL){

　　set_error ("%s", "Too many closing tags, not enough open tags");

　　goto fail;

　　}

　　text=current->data;

　　if (context->parser.end_element !=NULL){

　　context->parser.end_element (context, text, context->user_data, gerror);

　　if (gerror !=NULL && *gerror !=NULL){

　　g_free (text);

　　goto fail;

　　}

　　g_free (text);

　　while (p < end && *p !='>')

　　p++;

　　context->level=context->level->next;

　　g_slist_free_1 (current);

　　context->state=TEXT;

　　break;

　　} /* case CLOSING_ELEMENT */

　　} /* switch */

　　}

　　return TRUE;

　　fail:

　　if (context->parser.error && gerror !=NULL && *gerror)

　　context->parser.error (context, *gerror, context->user_data);

　　destroy_parse_state (context);

　　return FALSE;

　　}

　　本文大体理了一下大体思路,涉及到结构和函数指针,以及多级指针,是没有讲的,准备单独来讲.

本站仅提供存储服务，所有内容均由用户发布，如发现有害或侵权内容，请点击举报。