PostgreSQL的 initdb 源代码分析之十七-阿里云开发者社区

PostgreSQL的 initdb 源代码分析之十七

2017-12-12 1151

版权

本文内容由阿里云实名注册用户自发贡献，版权归原作者所有，阿里云开发者社区不拥有其著作权，亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容，填写侵权投诉表单进行举报，一经查实，本社区将立刻删除涉嫌侵权内容。

简介：

继续分析：

    setup_collation()

展开：

/*
 * populate pg_collation
 */
static void
setup_collation(void)
{
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
    int            i;
    FILE       *locale_a_handle;
    char        localebuf[NAMEDATALEN];
    int            count = 0;

    PG_CMD_DECL;
#endif

    fputs(_("creating collations ... "), stdout);
    fflush(stdout);

#if defined(HAVE_LOCALE_T) && !defined(WIN32)
    snprintf(cmd, sizeof(cmd),
             "\"%s\" %s template1 >%s",
             backend_exec, backend_options,
             DEVNULL);

    locale_a_handle = popen_check("locale -a", "r");
    if (!locale_a_handle)
        return;                    /* complaint already printed */

    PG_CMD_OPEN;

    PG_CMD_PUTS("CREATE TEMP TABLE tmp_pg_collation ( "
                "    collname name, "
                "    locale name, "
                "    encoding int) WITHOUT OIDS;\n");

    while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
    {
        size_t        len;
        int            enc;
        bool        skip;
        char       *quoted_locale;
        char        alias[NAMEDATALEN];

        len = strlen(localebuf);

        if (len == 0 || localebuf[len - 1] != '\n')
        {
            if (debug)
                fprintf(stderr, _("%s: locale name too long, skipped: %s\n"),
                        progname, localebuf);
            continue;
        }
        localebuf[len - 1] = '\0';

        /*
         * Some systems have locale names that don't consist entirely of ASCII
         * letters (such as "bokm&aring;l" or "fran&ccedil;ais").  This is
         * pretty silly, since we need the locale itself to interpret the
         * non-ASCII characters. We can't do much with those, so we filter
         * them out.
         */
        skip = false;
        for (i = 0; i < len; i++)
        {
            if (IS_HIGHBIT_SET(localebuf[i]))
            {
                skip = true;
                break;
            }
        }
        if (skip)
        {
            if (debug)
                fprintf(stderr, _("%s: locale name has non-ASCII characters, skipped: %s\n"),
                        progname, localebuf);
            continue;
        }

        enc = pg_get_encoding_from_locale(localebuf, debug);
        if (enc < 0)
        {
            /* error message printed by pg_get_encoding_from_locale() */
            continue;
        }
        if (!PG_VALID_BE_ENCODING(enc))
            continue;            /* ignore locales for client-only encodings */
        if (enc == PG_SQL_ASCII)
            continue;            /* C/POSIX are already in the catalog */

        count++;

        quoted_locale = escape_quotes(localebuf);

        PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n",
                       quoted_locale, quoted_locale, enc);

        /*
         * Generate aliases such as "en_US" in addition to "en_US.utf8" for
         * ease of use.  Note that collation names are unique per encoding
         * only, so this doesn't clash with "en_US" for LATIN1, say.
         */
        if (normalize_locale_name(alias, localebuf))
            PG_CMD_PRINTF3("INSERT INTO tmp_pg_collation VALUES (E'%s', E'%s', %d);\n",
                           escape_quotes(alias), quoted_locale, enc);
    }

    /* Add an SQL-standard name */
    PG_CMD_PRINTF1("INSERT INTO tmp_pg_collation VALUES ('ucs_basic', 'C', %d);\n", PG_UTF8);

    /*
     * When copying collations to the final location, eliminate aliases that
     * conflict with an existing locale name for the same encoding.  For
     * example, "br_FR.iso88591" is normalized to "br_FR", both for encoding
     * LATIN1.    But the unnormalized locale "br_FR" already exists for LATIN1.
     * Prefer the alias that matches the OS locale name, else the first locale
     * name by sort order (arbitrary choice to be deterministic).
     *
     * Also, eliminate any aliases that conflict with pg_collation's
     * hard-wired entries for "C" etc.
     */
    PG_CMD_PUTS("INSERT INTO pg_collation (collname, collnamespace, collowner, collencoding, collcollate, collctype) "
                " SELECT DISTINCT ON (collname, encoding)"
                "   collname, "
                "   (SELECT oid FROM pg_namespace WHERE nspname = 'pg_catalog') AS collnamespace, "
                "   (SELECT relowner FROM pg_class WHERE relname = 'pg_collation') AS collowner, "
                "   encoding, locale, locale "
                "  FROM tmp_pg_collation"
                "  WHERE NOT EXISTS (SELECT 1 FROM pg_collation WHERE collname = tmp_pg_collation.collname)"
       "  ORDER BY collname, encoding, (collname = locale) DESC, locale;\n");

    pclose(locale_a_handle);
    PG_CMD_CLOSE;

    check_ok();
    if (count == 0 && !debug)
    {
        printf(_("No usable system locales were found.\n"));
        printf(_("Use the option \"--debug\" to see details.\n"));
    }
#else                            /* not HAVE_LOCALE_T && not WIN32 */
    printf(_("not supported on this platform\n"));
    fflush(stdout);
#endif   /* not HAVE_LOCALE_T  && not WIN32 */
}

其实质就是，向 pg_collation 表中插入数据

补充一点，pg_collation 的数据大概是这样的：

pgsql=# \x
Expanded display is on.
pgsql=# select * from pg_collation limit 10;
-[ RECORD 1 ]-+-----------------
collname      | default
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | 
collctype     | 
-[ RECORD 2 ]-+-----------------
collname      | C
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | C
collctype     | C
-[ RECORD 3 ]-+-----------------
collname      | POSIX
collnamespace | 11
collowner     | 10
collencoding  | -1
collcollate   | POSIX
collctype     | POSIX
-[ RECORD 4 ]-+-----------------
collname      | aa_DJ
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_DJ.utf8
collctype     | aa_DJ.utf8
-[ RECORD 5 ]-+-----------------
collname      | aa_DJ
collnamespace | 11
collowner     | 10
collencoding  | 8
collcollate   | aa_DJ
collctype     | aa_DJ
-[ RECORD 6 ]-+-----------------
collname      | aa_DJ.iso88591
collnamespace | 11
collowner     | 10
collencoding  | 8
collcollate   | aa_DJ.iso88591
collctype     | aa_DJ.iso88591
-[ RECORD 7 ]-+-----------------
collname      | aa_DJ.utf8
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_DJ.utf8
collctype     | aa_DJ.utf8
-[ RECORD 8 ]-+-----------------
collname      | aa_ER
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER
collctype     | aa_ER
-[ RECORD 9 ]-+-----------------
collname      | aa_ER.utf8
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER.utf8
collctype     | aa_ER.utf8
-[ RECORD 10 ]+-----------------
collname      | aa_ER.utf8@saaho
collnamespace | 11
collowner     | 10
collencoding  | 6
collcollate   | aa_ER.utf8@saaho
collctype     | aa_ER.utf8@saaho

pgsql=#

本文转自健哥的数据花园博客园博客，原文链接：http://www.cnblogs.com/gaojian/p/3179158.html，如需转载请自行联系原作者

PostgreSQL的 initdb 源代码分析之十七

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像

探索云世界

热门

云计算

大数据

云原生

人工智能

数据库

开发与运维

活动广场

任务中心

训练营

直播

乘风者计划

下载

镜像站

技术资料

PostgreSQL的 initdb 源代码分析之十七

热门文章

最新文章

相关课程

相关电子书

相关实验场景

推荐镜像