跳转至

02 - 数据序列化

学习时间: 1.5小时 重要性: ⭐⭐⭐⭐⭐ 数据持久化必备


🎯 学习目标

  • 掌握JSON的读写
  • 理解pickle的使用场景
  • 学会处理CSV文件

📦 JSON - 最常用的数据格式

基本操作

Python
import json
from pathlib import Path

# Python对象转JSON字符串
data = {
    "name": "张三",
    "age": 25,
    "scores": [85, 90, 78],
    "passed": True
}

# 转为JSON字符串
json_str = json.dumps(data, ensure_ascii=False, indent=2)  # json.dumps()将Python对象转为JSON字符串
print(json_str)

# JSON字符串转Python对象
parsed = json.loads(json_str)
print(parsed["name"])

# 直接读写文件
Path("data.json").write_text(json_str, encoding="utf-8")
content = Path("data.json").read_text(encoding="utf-8")
data = json.loads(content)

实用技巧

Python
# 处理日期时间
from datetime import datetime

def json_serial(obj):
    """JSON序列化辅助函数"""
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError(f"Type {type(obj)} not serializable")

data = {
    "timestamp": datetime.now(),
    "value": 42
}

json_str = json.dumps(data, default=json_serial)

# 使用自定义编码器
class DateTimeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        return super().default(obj)

json_str = json.dumps(data, cls=DateTimeEncoder)

配置文件管理

Python
class Config:
    """简单的配置管理器"""

    def __init__(self, filepath="config.json"):
        self.filepath = Path(filepath)
        self.data = {}
        self.load()

    def load(self):
        if self.filepath.exists():
            self.data = json.loads(self.filepath.read_text(encoding="utf-8"))
        else:
            self.data = self.get_defaults()
            self.save()

    def save(self):
        self.filepath.write_text(
            json.dumps(self.data, ensure_ascii=False, indent=2),
            encoding="utf-8"
        )

    def get(self, key, default=None):
        return self.data.get(key, default)

    def set(self, key, value):
        self.data[key] = value
        self.save()

    @staticmethod  # @staticmethod定义静态方法,不需要实例即可调用
    def get_defaults():
        return {"debug": False, "max_retries": 3}

# 使用
config = Config()
config.set("api_key", "xxx")
print(config.get("debug"))

🥒 Pickle - Python对象序列化

Python
import pickle

# 保存Python对象
data = {
    "model": {"weights": [1, 2, 3]},
    "metadata": {"version": "1.0"}
}

with open("model.pkl", "wb") as f:
    pickle.dump(data, f)

# 加载Python对象
with open("model.pkl", "rb") as f:
    loaded = pickle.load(f)

# ⚠️ 安全警告:不要unpickle不信任的数据!
# 只用于你自己的数据

何时用pickle? - 保存机器学习模型 - 临时缓存Python对象 - 进程间通信

何时不用pickle? - 需要跨语言 ✅ 用JSON - 需要可读性 ✅ 用JSON - 数据交换 ✅ 用JSON


📊 CSV - 表格数据处理

Python
import csv
from pathlib import Path

# 读取CSV
def read_csv(filepath):
    data = []
    with open(filepath, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f)  # 作为字典读取
        for row in reader:
            data.append(row)
    return data

# 写入CSV
def write_csv(data, filepath):
    if not data:
        return

    with open(filepath, "w", encoding="utf-8", newline="") as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys())
        writer.writeheader()
        writer.writerows(data)

# 使用
data = [
    {"name": "张三", "age": 25, "city": "北京"},
    {"name": "李四", "age": 30, "city": "上海"}
]

write_csv(data, "output.csv")
loaded = read_csv("output.csv")

💡 实用场景

数据导出为多种格式

Python
def export_data(data, filepath, format="json"):
    """导出数据为不同格式"""
    path = Path(filepath)

    if format == "json":
        path.write_text(
            json.dumps(data, ensure_ascii=False, indent=2),
            encoding="utf-8"
        )
    elif format == "csv":
        write_csv(data, filepath)
    elif format == "pickle":
        with open(filepath, "wb") as f:
            pickle.dump(data, f)

# 使用
data = [{"name": "张三", "age": 25}]
export_data(data, "data.json", "json")
export_data(data, "data.csv", "csv")

📝 练习

练习1: JSON数据库

Python
import json
from pathlib import Path

class JSONDatabase:
    """简单的JSON数据库"""

    def __init__(self, filepath):
        self.filepath = Path(filepath)
        self.data = self._load()

    def _load(self):
        """加载数据"""
        if self.filepath.exists():
            return json.loads(self.filepath.read_text(encoding='utf-8'))
        return {}

    def _save(self):
        """保存数据"""
        self.filepath.write_text(
            json.dumps(self.data, ensure_ascii=False, indent=2),
            encoding='utf-8'
        )

    def create(self, key, value):
        """创建记录"""
        if key in self.data:
            raise KeyError(f"Key '{key}' already exists")
        self.data[key] = value
        self._save()

    def read(self, key):
        """读取记录"""
        return self.data.get(key)

    def update(self, key, value):
        """更新记录"""
        if key not in self.data:
            raise KeyError(f"Key '{key}' not found")
        self.data[key] = value
        self._save()

    def delete(self, key):
        """删除记录"""
        if key not in self.data:
            raise KeyError(f"Key '{key}' not found")
        del self.data[key]
        self._save()

    def list_all(self):
        """列出所有记录"""
        return self.data

# 使用示例
db = JSONDatabase("mydb.json")
db.create("user1", {"name": "张三", "age": 25})
print(db.read("user1"))
db.update("user1", {"name": "张三", "age": 26})
db.delete("user1")

练习2: CSV转JSON

Python
import csv
import json
from pathlib import Path

def csv_to_json(csv_path, json_path):
    """将CSV文件转换为JSON"""
    data = []

    with open(csv_path, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            # 尝试转换数字类型
            for key, value in row.items():
                try:
                    row[key] = int(value)
                except ValueError:
                    try:
                        row[key] = float(value)
                    except ValueError:
                        pass
            data.append(row)

    Path(json_path).write_text(
        json.dumps(data, ensure_ascii=False, indent=2),
        encoding='utf-8'
    )
    print(f"Converted {len(data)} records to {json_path}")

# 创建测试CSV文件
test_csv = """name,age,city
张三,25,北京
李四,30,上海
王五,28,广州"""

Path("test.csv").write_text(test_csv, encoding='utf-8')
csv_to_json("test.csv", "test.json")

练习3: 配置版本管理

Python
import json
import shutil
from pathlib import Path
from datetime import datetime

class VersionedConfig:
    """带版本管理的配置"""

    def __init__(self, filepath="config.json", backup_dir="config_backups"):
        self.filepath = Path(filepath)
        self.backup_dir = Path(backup_dir)
        self.backup_dir.mkdir(exist_ok=True)
        self.data = self._load()

    def _load(self):
        if self.filepath.exists():
            return json.loads(self.filepath.read_text(encoding='utf-8'))
        return {"version": 1, "settings": {}}

    def _save(self):
        self.filepath.write_text(
            json.dumps(self.data, ensure_ascii=False, indent=2),
            encoding='utf-8'
        )

    def _backup(self):
        """创建备份"""
        if self.filepath.exists():
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            backup_path = self.backup_dir / f"config_v{self.data['version']}_{timestamp}.json"
            shutil.copy(self.filepath, backup_path)
            print(f"Backup created: {backup_path}")

    def update(self, key, value):
        """更新配置并创建新版本"""
        self._backup()
        self.data['version'] += 1
        self.data['settings'][key] = value
        self.data['last_updated'] = datetime.now().isoformat()
        self._save()
        print(f"Updated to version {self.data['version']}")

    def get(self, key, default=None):
        return self.data['settings'].get(key, default)

    def list_versions(self):
        """列出所有备份版本"""
        backups = sorted(self.backup_dir.glob("config_v*.json"))
        return [b.name for b in backups]

# 使用
config = VersionedConfig()
config.update("theme", "dark")
config.update("language", "zh-CN")
print(config.list_versions())

🎯 自我检查

  • 掌握JSON的基本读写操作
  • 能处理JSON中的日期时间等特殊类型
  • 理解pickle的使用场景和安全性问题
  • 能读写CSV文件
  • 能实现简单的配置管理功能

📚 延伸阅读


下一步: 03 - 日期时间处理