PostgreSQL作为全球最先进的开源关系型数据库管理系统,凭借其强大的功能、卓越的扩展性、高度的SQL标准兼容性以及活跃的社区生态,被誉为“开源数据库之王”。它不仅支持传统的关系型数据模型,还提供了JSON、XML、全文搜索、地理空间等非关系型功能,成为开发者和企业的首选数据库之一。本文将系统全面地梳理PostgreSQL的核心知识点,从基础概念到高级特性,帮助初学者建立完整的知识体系,也为有经验的DBA和开发者提供深入的技术参考。
一、PostgreSQL基础
1.1 PostgreSQL概述
PostgreSQL是一个功能强大的开源对象-关系型数据库系统,具有30多年的活跃开发历史,以其可靠性、功能健壮性和性能著称。
核心特性:
完全遵守ACID原则
支持多种高级数据类型(JSON、数组、范围类型等)
强大的并发控制(MVCC)
丰富的索引类型(B-tree、Hash、GiST、SP-GiST、GIN、BRIN)
完整的SQL标准支持(SQL:2016兼容)
强大的扩展机制(支持自定义数据类型、操作符、函数)
多种复制和高可用方案(流复制、逻辑复制)
地理空间数据支持(PostGIS扩展)
1.2 安装与配置
Linux安装(Ubuntu/Debian):
# 更新包管理器
sudo apt update
# 安装PostgreSQL
sudo apt install postgresql postgresql-contrib
# 启动服务
sudo systemctl start postgresql
sudo systemctl enable postgresql
# 查看状态
sudo systemctl status postgresql
# 切换至postgres用户
sudo -i -u postgres
# 连接数据库
psql
# 设置postgres用户密码
ALTER USER postgres WITH PASSWORD 'strong_password';
# 退出psql
\q
Linux安装(RHEL/CentOS):
# 安装PostgreSQL仓库
dnf install -y https://download.postgresql.org/pub/repos/yum/reporpms/EL-8-x86_64/pgdg-redhat-repo-latest.noarch.rpm
# 安装PostgreSQL
dnf install -y postgresql16-server postgresql16-contrib
# 初始化数据库
/usr/pgsql-16/bin/postgresql-16-setup initdb
# 启动服务
systemctl enable postgresql-16
systemctl start postgresql-16
配置文件位置:
主配置文件:/etc/postgresql/16/main/postgresql.conf
客户端认证文件:/etc/postgresql/16/main/pg_hba.conf
服务控制:pg_ctl
修改配置示例:
# 编辑postgresql.conf
sudo nano /etc/postgresql/16/main/postgresql.conf
# 修改监听地址
listen_addresses = '*'
port = 5432
max_connections = 200
# 修改内存配置
shared_buffers = 256MB
effective_cache_size = 1GB
work_mem = 16MB
maintenance_work_mem = 128MB
# 修改日志配置
logging_collector = on
log_directory = 'log'
log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'
log_statement = 'all'
# 编辑pg_hba.conf
sudo nano /etc/postgresql/16/main/pg_hba.conf
# 添加允许远程连接
host all all 0.0.0.0/0 md5
# 重启服务
sudo systemctl restart postgresql
1.3 PostgreSQL体系结构
进程架构:
Postmaster(主进程)
├── Logger(日志进程)
├── Writer(写入进程)
├── WalWriter(预写日志进程)
├── Checkpointer(检查点进程)
├── Autovacuum(自动清理进程)
├── StatsCollector(统计收集进程)
└── Backend Processes(客户端连接进程)
存储结构:
数据库集群(Data Directory)
├── base/ # 数据库文件
│ ├── 1/ # 数据库OID目录
│ │ ├── 1255 # 表文件
│ │ ├── 1255_fsm # 空闲空间映射
│ │ └── 1255_vm # 可见性映射
├── global/ # 全局系统表
├── pg_wal/ # WAL日志
├── pg_xact/ # 事务状态
├── pg_log/ # 日志文件
└── postgresql.conf # 配置文件
-- 查看数据库目录
SHOW data_directory;
-- 查看数据库OID
SELECT oid, datname FROM pg_database;
-- 查看表文件信息
SELECT relname, relfilenode, relpages, reltuples
FROM pg_class
WHERE relname = 'employees';
二、SQL基础
2.1 数据类型


-- 创建表示例
CREATE TABLE employees (
employee_id SERIAL PRIMARY KEY,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE,
birth_date DATE,
hire_date TIMESTAMPTZ DEFAULT NOW(),
salary NUMERIC(10,2) CHECK (salary > 0),
department_id INTEGER,
is_active BOOLEAN DEFAULT TRUE,
tags TEXT[],
metadata JSONB,
phone_numbers TEXT[],
ip_address INET,
work_time INTERVAL
);
-- 数组类型使用
INSERT INTO employees (first_name, last_name, tags)
VALUES ('张三', '李四', ARRAY['developer', 'senior']);
SELECT * FROM employees WHERE tags @> ARRAY['developer'];
-- JSONB类型使用
INSERT INTO employees (first_name, last_name, metadata)
VALUES ('王五', '赵六', '{"skills": ["SQL", "Python"], "level": "advanced"}');
SELECT * FROM employees WHERE metadata @> '{"level": "advanced"}';
SELECT metadata->>'skills' FROM employees;
2.2 表操作
-- 创建表
CREATE TABLE departments (
department_id SERIAL PRIMARY KEY,
department_name VARCHAR(100) NOT NULL,
location VARCHAR(100),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- 临时表(会话级)
CREATE TEMP TABLE temp_employees AS
SELECT * FROM employees WHERE department_id = 10;
-- 临时表(事务级)
CREATE TEMP TABLE temp_data (id INT, name TEXT) ON COMMIT DROP;
-- 创建表时添加约束
CREATE TABLE products (
product_id SERIAL PRIMARY KEY,
product_code VARCHAR(20) NOT NULL UNIQUE,
product_name VARCHAR(200) NOT NULL,
price NUMERIC(10,2) DEFAULT 0,
category_id INTEGER REFERENCES categories(category_id),
description TEXT,
CONSTRAINT price_positive CHECK (price >= 0)
);
-- 修改表
ALTER TABLE employees ADD COLUMN middle_name VARCHAR(50);
ALTER TABLE employees ALTER COLUMN salary TYPE NUMERIC(12,2);
ALTER TABLE employees RENAME COLUMN first_name TO given_name;
ALTER TABLE employees DROP COLUMN middle_name CASCADE;
ALTER TABLE employees SET SCHEMA new_schema;
-- 重命名表
ALTER TABLE employees RENAME TO staff;
-- 删除表
DROP TABLE IF EXISTS temp_employees CASCADE;
-- 清空表
TRUNCATE TABLE temp_employees RESTART IDENTITY;
2.3 约束
-- 主键约束
ALTER TABLE employees ADD PRIMARY KEY (employee_id);
ALTER TABLE employees ADD CONSTRAINT pk_employees PRIMARY KEY (employee_id);
-- 外键约束
ALTER TABLE employees ADD CONSTRAINT fk_department
FOREIGN KEY (department_id) REFERENCES departments(department_id)
ON DELETE CASCADE
ON UPDATE CASCADE;
-- 唯一约束
ALTER TABLE employees ADD CONSTRAINT unique_email UNIQUE (email);
CREATE UNIQUE INDEX idx_unique_email ON employees (email);
-- 检查约束
ALTER TABLE employees ADD CONSTRAINT check_salary
CHECK (salary >= 0 AND salary <= 1000000);
-- NOT NULL约束
ALTER TABLE employees ALTER COLUMN first_name SET NOT NULL;
-- 默认值
ALTER TABLE employees ALTER COLUMN is_active SET DEFAULT TRUE;
-- 排除约束
CREATE TABLE schedules (
room_id INTEGER,
time_range TSRANGE,
EXCLUDE USING gist (room_id WITH =, time_range WITH &&)
);
2.4 基本查询
-- SELECT查询
SELECT
employee_id,
first_name || ' ' || last_name AS full_name,
email,
salary
FROM employees
WHERE department_id IN (SELECT department_id FROM departments WHERE location = 'Beijing')
AND salary BETWEEN 5000 AND 10000
AND hire_date >= '2023-01-01'
AND first_name LIKE '张%'
AND email IS NOT NULL
ORDER BY salary DESC, hire_date ASC
LIMIT 10 OFFSET 20;
-- 分页查询(标准)
SELECT * FROM employees
ORDER BY employee_id
LIMIT 10 OFFSET 20;
-- 分页查询(高效,用于深分页)
SELECT * FROM employees
WHERE employee_id > (SELECT employee_id FROM employees ORDER BY employee_id LIMIT 1 OFFSET 200)
ORDER BY employee_id
LIMIT 20;
-- 聚合查询
SELECT
department_id,
COUNT(*) AS emp_count,
AVG(salary) AS avg_salary,
SUM(salary) AS total_salary,
MAX(salary) AS max_salary,
MIN(salary) AS min_salary,
STRING_AGG(first_name, ', ') AS employees
FROM employees
WHERE is_active = true
GROUP BY department_id
HAVING COUNT(*) > 5
ORDER BY avg_salary DESC;
-- GROUPING SETS
SELECT
department_id,
job_title,
COUNT(*)
FROM employees
GROUP BY GROUPING SETS ((department_id, job_title), (department_id), ());
三、高级查询
3.1 连接查询
-- INNER JOIN
SELECT e.employee_id, e.first_name, d.department_name
FROM employees e
INNER JOIN departments d ON e.department_id = d.department_id;
-- LEFT JOIN
SELECT e.employee_id, e.first_name, d.department_name
FROM employees e
LEFT JOIN departments d ON e.department_id = d.department_id;
-- RIGHT JOIN
SELECT e.employee_id, e.first_name, d.department_name
FROM employees e
RIGHT JOIN departments d ON e.department_id = d.department_id;
-- FULL OUTER JOIN
SELECT e.employee_id, e.first_name, d.department_name
FROM employees e
FULL OUTER JOIN departments d ON e.department_id = d.department_id;
-- CROSS JOIN
SELECT e.first_name, d.department_name
FROM employees e
CROSS JOIN departments d;
-- 自然连接
SELECT * FROM employees NATURAL JOIN departments;
-- 自连接
SELECT
e1.first_name AS employee,
e2.first_name AS manager
FROM employees e1
LEFT JOIN employees e2 ON e1.manager_id = e2.employee_id;
-- LATERAL连接
SELECT e.employee_id, e.first_name, latest_order.*
FROM employees e
LEFT JOIN LATERAL (
SELECT order_id, order_date, amount
FROM orders o
WHERE o.employee_id = e.employee_id
ORDER BY order_date DESC
LIMIT 3
) latest_order ON true;
3.2 子查询与CTE
-- 标量子查询
SELECT
first_name,
salary,
(SELECT AVG(salary) FROM employees) AS avg_salary,
salary - (SELECT AVG(salary) FROM employees) AS diff_from_avg
FROM employees;
-- IN子查询
SELECT first_name, last_name
FROM employees
WHERE department_id IN (
SELECT department_id
FROM departments
WHERE location = 'Beijing'
);
-- EXISTS子查询
SELECT first_name, last_name
FROM employees e
WHERE EXISTS (
SELECT 1
FROM orders o
WHERE o.employee_id = e.employee_id
AND o.order_date > '2023-01-01'
);
-- 派生表
SELECT department_id, avg_salary
FROM (
SELECT department_id, AVG(salary) AS avg_salary
FROM employees
WHERE is_active = true
GROUP BY department_id
) AS dept_stats
WHERE avg_salary > 8000;
-- CTE(公用表表达式)
WITH dept_stats AS (
SELECT
department_id,
AVG(salary) AS avg_salary,
COUNT(*) AS emp_count
FROM employees
GROUP BY department_id
)
SELECT d.department_name, ds.avg_salary, ds.emp_count
FROM departments d
INNER JOIN dept_stats ds ON d.department_id = ds.department_id;
-- 递归CTE
WITH RECURSIVE org_chart AS (
-- 锚点成员:根节点
SELECT employee_id, first_name, manager_id, 0 AS level
FROM employees
WHERE manager_id IS NULL
UNION ALL
-- 递归成员:子节点
SELECT e.employee_id, e.first_name, e.manager_id, o.level + 1
FROM employees e
INNER JOIN org_chart o ON e.manager_id = o.employee_id
)
SELECT * FROM org_chart ORDER BY level, employee_id;
-- 递归CTE(树形路径)
WITH RECURSIVE category_tree AS (
SELECT category_id, category_name, parent_id,
ARRAY[category_name] AS path
FROM categories
WHERE parent_id IS NULL
UNION ALL
SELECT c.category_id, c.category_name, c.parent_id,
ct.path || c.category_name
FROM categories c
INNER JOIN category_tree ct ON c.parent_id = ct.category_id
)
SELECT * FROM category_tree;
3.3 窗口函数
-- 基础窗口函数
SELECT
employee_id,
first_name,
department_id,
salary,
ROW_NUMBER() OVER (ORDER BY salary DESC) AS row_num,
RANK() OVER (ORDER BY salary DESC) AS rank,
DENSE_RANK() OVER (ORDER BY salary DESC) AS dense_rank,
PERCENT_RANK() OVER (ORDER BY salary DESC) AS percent_rank,
CUME_DIST() OVER (ORDER BY salary DESC) AS cume_dist
FROM employees;
-- 分组窗口函数
SELECT
employee_id,
first_name,
department_id,
salary,
ROW_NUMBER() OVER (PARTITION BY department_id ORDER BY salary DESC) AS dept_rank,
AVG(salary) OVER (PARTITION BY department_id) AS dept_avg_salary,
salary - AVG(salary) OVER (PARTITION BY department_id) AS diff_from_dept_avg
FROM employees;
-- 窗口函数:LAG/LEAD
SELECT
employee_id,
first_name,
salary,
LAG(salary, 1, 0) OVER (ORDER BY salary) AS prev_salary,
LEAD(salary, 1, 0) OVER (ORDER BY salary) AS next_salary,
salary - LAG(salary, 1, 0) OVER (ORDER BY salary) AS salary_increase
FROM employees;
-- 窗口函数:FIRST_VALUE/LAST_VALUE
SELECT
employee_id,
first_name,
department_id,
salary,
FIRST_VALUE(first_name) OVER (PARTITION BY department_id ORDER BY salary DESC) AS highest_paid_employee,
LAST_VALUE(first_name) OVER (PARTITION BY department_id ORDER BY salary DESC
RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS lowest_paid_employee
FROM employees;
-- 窗口聚合
SELECT
employee_id,
first_name,
hire_date,
salary,
SUM(salary) OVER (ORDER BY hire_date) AS running_total,
AVG(salary) OVER (ORDER BY hire_date ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS moving_avg
FROM employees;
-- NTILE分桶
SELECT
employee_id,
first_name,
salary,
NTILE(4) OVER (ORDER BY salary) AS quartile
FROM employees;