02 - 数据序列化¶
学习时间: 1.5小时 重要性: ⭐⭐⭐⭐⭐ 数据持久化必备
🎯 学习目标¶
- 掌握JSON的读写
- 理解pickle的使用场景
- 学会处理CSV文件
📦 JSON - 最常用的数据格式¶
基本操作¶
Python
import json
from pathlib import Path
# Python对象转JSON字符串
data = {
"name": "张三",
"age": 25,
"scores": [85, 90, 78],
"passed": True
}
# 转为JSON字符串
json_str = json.dumps(data, ensure_ascii=False, indent=2) # json.dumps()将Python对象转为JSON字符串
print(json_str)
# JSON字符串转Python对象
parsed = json.loads(json_str)
print(parsed["name"])
# 直接读写文件
Path("data.json").write_text(json_str, encoding="utf-8")
content = Path("data.json").read_text(encoding="utf-8")
data = json.loads(content)
实用技巧¶
Python
# 处理日期时间
from datetime import datetime
def json_serial(obj):
"""JSON序列化辅助函数"""
if isinstance(obj, datetime):
return obj.isoformat()
raise TypeError(f"Type {type(obj)} not serializable")
data = {
"timestamp": datetime.now(),
"value": 42
}
json_str = json.dumps(data, default=json_serial)
# 使用自定义编码器
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
json_str = json.dumps(data, cls=DateTimeEncoder)
配置文件管理¶
Python
class Config:
"""简单的配置管理器"""
def __init__(self, filepath="config.json"):
self.filepath = Path(filepath)
self.data = {}
self.load()
def load(self):
if self.filepath.exists():
self.data = json.loads(self.filepath.read_text(encoding="utf-8"))
else:
self.data = self.get_defaults()
self.save()
def save(self):
self.filepath.write_text(
json.dumps(self.data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
def get(self, key, default=None):
return self.data.get(key, default)
def set(self, key, value):
self.data[key] = value
self.save()
@staticmethod # @staticmethod定义静态方法,不需要实例即可调用
def get_defaults():
return {"debug": False, "max_retries": 3}
# 使用
config = Config()
config.set("api_key", "xxx")
print(config.get("debug"))
🥒 Pickle - Python对象序列化¶
Python
import pickle
# 保存Python对象
data = {
"model": {"weights": [1, 2, 3]},
"metadata": {"version": "1.0"}
}
with open("model.pkl", "wb") as f:
pickle.dump(data, f)
# 加载Python对象
with open("model.pkl", "rb") as f:
loaded = pickle.load(f)
# ⚠️ 安全警告:不要unpickle不信任的数据!
# 只用于你自己的数据
何时用pickle? - 保存机器学习模型 - 临时缓存Python对象 - 进程间通信
何时不用pickle? - 需要跨语言 ✅ 用JSON - 需要可读性 ✅ 用JSON - 数据交换 ✅ 用JSON
📊 CSV - 表格数据处理¶
Python
import csv
from pathlib import Path
# 读取CSV
def read_csv(filepath):
data = []
with open(filepath, "r", encoding="utf-8") as f:
reader = csv.DictReader(f) # 作为字典读取
for row in reader:
data.append(row)
return data
# 写入CSV
def write_csv(data, filepath):
if not data:
return
with open(filepath, "w", encoding="utf-8", newline="") as f:
writer = csv.DictWriter(f, fieldnames=data[0].keys())
writer.writeheader()
writer.writerows(data)
# 使用
data = [
{"name": "张三", "age": 25, "city": "北京"},
{"name": "李四", "age": 30, "city": "上海"}
]
write_csv(data, "output.csv")
loaded = read_csv("output.csv")
💡 实用场景¶
数据导出为多种格式¶
Python
def export_data(data, filepath, format="json"):
"""导出数据为不同格式"""
path = Path(filepath)
if format == "json":
path.write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding="utf-8"
)
elif format == "csv":
write_csv(data, filepath)
elif format == "pickle":
with open(filepath, "wb") as f:
pickle.dump(data, f)
# 使用
data = [{"name": "张三", "age": 25}]
export_data(data, "data.json", "json")
export_data(data, "data.csv", "csv")
📝 练习¶
练习1: JSON数据库¶
Python
import json
from pathlib import Path
class JSONDatabase:
"""简单的JSON数据库"""
def __init__(self, filepath):
self.filepath = Path(filepath)
self.data = self._load()
def _load(self):
"""加载数据"""
if self.filepath.exists():
return json.loads(self.filepath.read_text(encoding='utf-8'))
return {}
def _save(self):
"""保存数据"""
self.filepath.write_text(
json.dumps(self.data, ensure_ascii=False, indent=2),
encoding='utf-8'
)
def create(self, key, value):
"""创建记录"""
if key in self.data:
raise KeyError(f"Key '{key}' already exists")
self.data[key] = value
self._save()
def read(self, key):
"""读取记录"""
return self.data.get(key)
def update(self, key, value):
"""更新记录"""
if key not in self.data:
raise KeyError(f"Key '{key}' not found")
self.data[key] = value
self._save()
def delete(self, key):
"""删除记录"""
if key not in self.data:
raise KeyError(f"Key '{key}' not found")
del self.data[key]
self._save()
def list_all(self):
"""列出所有记录"""
return self.data
# 使用示例
db = JSONDatabase("mydb.json")
db.create("user1", {"name": "张三", "age": 25})
print(db.read("user1"))
db.update("user1", {"name": "张三", "age": 26})
db.delete("user1")
练习2: CSV转JSON¶
Python
import csv
import json
from pathlib import Path
def csv_to_json(csv_path, json_path):
"""将CSV文件转换为JSON"""
data = []
with open(csv_path, 'r', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
# 尝试转换数字类型
for key, value in row.items():
try:
row[key] = int(value)
except ValueError:
try:
row[key] = float(value)
except ValueError:
pass
data.append(row)
Path(json_path).write_text(
json.dumps(data, ensure_ascii=False, indent=2),
encoding='utf-8'
)
print(f"Converted {len(data)} records to {json_path}")
# 创建测试CSV文件
test_csv = """name,age,city
张三,25,北京
李四,30,上海
王五,28,广州"""
Path("test.csv").write_text(test_csv, encoding='utf-8')
csv_to_json("test.csv", "test.json")
练习3: 配置版本管理¶
Python
import json
import shutil
from pathlib import Path
from datetime import datetime
class VersionedConfig:
"""带版本管理的配置"""
def __init__(self, filepath="config.json", backup_dir="config_backups"):
self.filepath = Path(filepath)
self.backup_dir = Path(backup_dir)
self.backup_dir.mkdir(exist_ok=True)
self.data = self._load()
def _load(self):
if self.filepath.exists():
return json.loads(self.filepath.read_text(encoding='utf-8'))
return {"version": 1, "settings": {}}
def _save(self):
self.filepath.write_text(
json.dumps(self.data, ensure_ascii=False, indent=2),
encoding='utf-8'
)
def _backup(self):
"""创建备份"""
if self.filepath.exists():
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_path = self.backup_dir / f"config_v{self.data['version']}_{timestamp}.json"
shutil.copy(self.filepath, backup_path)
print(f"Backup created: {backup_path}")
def update(self, key, value):
"""更新配置并创建新版本"""
self._backup()
self.data['version'] += 1
self.data['settings'][key] = value
self.data['last_updated'] = datetime.now().isoformat()
self._save()
print(f"Updated to version {self.data['version']}")
def get(self, key, default=None):
return self.data['settings'].get(key, default)
def list_versions(self):
"""列出所有备份版本"""
backups = sorted(self.backup_dir.glob("config_v*.json"))
return [b.name for b in backups]
# 使用
config = VersionedConfig()
config.update("theme", "dark")
config.update("language", "zh-CN")
print(config.list_versions())
🎯 自我检查¶
- 掌握JSON的基本读写操作
- 能处理JSON中的日期时间等特殊类型
- 理解pickle的使用场景和安全性问题
- 能读写CSV文件
- 能实现简单的配置管理功能
📚 延伸阅读¶
下一步: 03 - 日期时间处理