256 lines
8.9 KiB
Python
256 lines
8.9 KiB
Python
|
|
"""
|
|||
|
|
数据转换服务
|
|||
|
|
支持字段映射、数据过滤、数据转换等功能
|
|||
|
|
"""
|
|||
|
|
from typing import Dict, Any, List, Optional, Union
|
|||
|
|
import json
|
|||
|
|
import re
|
|||
|
|
|
|||
|
|
|
|||
|
|
class DataTransformer:
|
|||
|
|
"""数据转换器"""
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def get_nested_value(data: Dict[str, Any], path: str) -> Any:
|
|||
|
|
"""
|
|||
|
|
从嵌套字典中获取值
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
data: 数据字典
|
|||
|
|
path: 路径,如 'user.name' 或 'items[0].price'
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
值,如果不存在返回None
|
|||
|
|
"""
|
|||
|
|
try:
|
|||
|
|
# 处理混合路径,如 items[0].price
|
|||
|
|
if '[' in path and ']' in path:
|
|||
|
|
# 先处理数组索引部分
|
|||
|
|
bracket_match = re.search(r'(\w+)\[(\d+)\]', path)
|
|||
|
|
if bracket_match:
|
|||
|
|
array_key = bracket_match.group(1)
|
|||
|
|
array_index = int(bracket_match.group(2))
|
|||
|
|
rest_path = path[bracket_match.end():]
|
|||
|
|
|
|||
|
|
# 获取数组
|
|||
|
|
if array_key in data and isinstance(data[array_key], list):
|
|||
|
|
if array_index < len(data[array_key]):
|
|||
|
|
array_item = data[array_key][array_index]
|
|||
|
|
# 如果还有后续路径,继续获取
|
|||
|
|
if rest_path.startswith('.'):
|
|||
|
|
return DataTransformer.get_nested_value(array_item, rest_path[1:])
|
|||
|
|
else:
|
|||
|
|
return array_item
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# 处理嵌套路径,如 user.name
|
|||
|
|
keys = path.split('.')
|
|||
|
|
value = data
|
|||
|
|
for key in keys:
|
|||
|
|
if isinstance(value, dict):
|
|||
|
|
value = value.get(key)
|
|||
|
|
elif isinstance(value, list) and key.isdigit():
|
|||
|
|
value = value[int(key)] if int(key) < len(value) else None
|
|||
|
|
else:
|
|||
|
|
return None
|
|||
|
|
if value is None:
|
|||
|
|
return None
|
|||
|
|
return value
|
|||
|
|
except (KeyError, IndexError, TypeError, AttributeError):
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def set_nested_value(data: Dict[str, Any], path: str, value: Any) -> None:
|
|||
|
|
"""
|
|||
|
|
在嵌套字典中设置值
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
data: 数据字典
|
|||
|
|
path: 路径,如 'user.name'
|
|||
|
|
value: 要设置的值
|
|||
|
|
"""
|
|||
|
|
keys = path.split('.')
|
|||
|
|
current = data
|
|||
|
|
|
|||
|
|
# 创建嵌套结构
|
|||
|
|
for key in keys[:-1]:
|
|||
|
|
if key not in current:
|
|||
|
|
current[key] = {}
|
|||
|
|
current = current[key]
|
|||
|
|
|
|||
|
|
# 设置值
|
|||
|
|
current[keys[-1]] = value
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def transform_mapping(input_data: Dict[str, Any], mapping: Dict[str, str]) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
字段映射转换
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
input_data: 输入数据
|
|||
|
|
mapping: 映射规则,格式: {"target_key": "source_key"}
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
转换后的数据
|
|||
|
|
"""
|
|||
|
|
result = {}
|
|||
|
|
for target_key, source_key in mapping.items():
|
|||
|
|
value = DataTransformer.get_nested_value(input_data, source_key)
|
|||
|
|
if value is not None:
|
|||
|
|
# 如果目标键包含点或方括号,使用嵌套设置
|
|||
|
|
if '.' in target_key or '[' in target_key:
|
|||
|
|
DataTransformer.set_nested_value(result, target_key, value)
|
|||
|
|
else:
|
|||
|
|
# 简单键直接设置
|
|||
|
|
result[target_key] = value
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def transform_filter(input_data: Dict[str, Any], filter_rules: List[Dict[str, Any]]) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
数据过滤
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
input_data: 输入数据
|
|||
|
|
filter_rules: 过滤规则列表,格式: [{"field": "key", "operator": ">", "value": 10}]
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
过滤后的数据
|
|||
|
|
"""
|
|||
|
|
result = {}
|
|||
|
|
|
|||
|
|
for rule in filter_rules:
|
|||
|
|
field = rule.get('field')
|
|||
|
|
operator = rule.get('operator', '==')
|
|||
|
|
value = rule.get('value')
|
|||
|
|
|
|||
|
|
if not field:
|
|||
|
|
continue
|
|||
|
|
|
|||
|
|
field_value = DataTransformer.get_nested_value(input_data, field)
|
|||
|
|
|
|||
|
|
# 应用过滤规则
|
|||
|
|
should_include = False
|
|||
|
|
if operator == '==' and field_value == value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == '!=' and field_value != value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == '>' and field_value > value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == '>=' and field_value >= value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == '<' and field_value < value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == '<=' and field_value <= value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == 'in' and field_value in value:
|
|||
|
|
should_include = True
|
|||
|
|
elif operator == 'not in' and field_value not in value:
|
|||
|
|
should_include = True
|
|||
|
|
|
|||
|
|
if should_include:
|
|||
|
|
# 包含该字段
|
|||
|
|
if field in input_data:
|
|||
|
|
result[field] = input_data[field]
|
|||
|
|
else:
|
|||
|
|
# 如果是嵌套字段,需要重建结构
|
|||
|
|
DataTransformer.set_nested_value(result, field, field_value)
|
|||
|
|
|
|||
|
|
return result if result else input_data
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def transform_compute(input_data: Dict[str, Any], compute_rules: Dict[str, str]) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
数据计算转换
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
input_data: 输入数据
|
|||
|
|
compute_rules: 计算规则,格式: {"result": "{a} + {b}"}
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
转换后的数据
|
|||
|
|
"""
|
|||
|
|
result = input_data.copy()
|
|||
|
|
|
|||
|
|
for target_key, expression in compute_rules.items():
|
|||
|
|
try:
|
|||
|
|
# 替换变量
|
|||
|
|
computed_expression = expression
|
|||
|
|
for key, value in input_data.items():
|
|||
|
|
placeholder = f'{{{key}}}'
|
|||
|
|
if placeholder in computed_expression:
|
|||
|
|
if isinstance(value, (dict, list)):
|
|||
|
|
computed_expression = computed_expression.replace(
|
|||
|
|
placeholder,
|
|||
|
|
json.dumps(value, ensure_ascii=False)
|
|||
|
|
)
|
|||
|
|
else:
|
|||
|
|
computed_expression = computed_expression.replace(
|
|||
|
|
placeholder,
|
|||
|
|
str(value)
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# 安全评估表达式
|
|||
|
|
safe_dict = {
|
|||
|
|
'__builtins__': {},
|
|||
|
|
'abs': abs,
|
|||
|
|
'min': min,
|
|||
|
|
'max': max,
|
|||
|
|
'sum': sum,
|
|||
|
|
'len': len,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# 添加输入数据中的值
|
|||
|
|
for key, value in input_data.items():
|
|||
|
|
if isinstance(value, (str, int, float, bool, type(None))):
|
|||
|
|
safe_dict[key] = value
|
|||
|
|
|
|||
|
|
computed_value = eval(computed_expression, safe_dict)
|
|||
|
|
result[target_key] = computed_value
|
|||
|
|
except Exception as e:
|
|||
|
|
# 计算失败,跳过该字段
|
|||
|
|
result[target_key] = None
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
@staticmethod
|
|||
|
|
def transform_data(
|
|||
|
|
input_data: Dict[str, Any],
|
|||
|
|
mapping: Optional[Dict[str, str]] = None,
|
|||
|
|
filter_rules: Optional[List[Dict[str, Any]]] = None,
|
|||
|
|
compute_rules: Optional[Dict[str, str]] = None,
|
|||
|
|
mode: str = 'mapping'
|
|||
|
|
) -> Dict[str, Any]:
|
|||
|
|
"""
|
|||
|
|
数据转换(综合方法)
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
input_data: 输入数据
|
|||
|
|
mapping: 字段映射规则
|
|||
|
|
filter_rules: 过滤规则
|
|||
|
|
compute_rules: 计算规则
|
|||
|
|
mode: 转换模式 ('mapping', 'filter', 'compute', 'all')
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
转换后的数据
|
|||
|
|
"""
|
|||
|
|
result = input_data.copy()
|
|||
|
|
|
|||
|
|
if mode == 'mapping' or mode == 'all':
|
|||
|
|
if mapping:
|
|||
|
|
result = DataTransformer.transform_mapping(result, mapping)
|
|||
|
|
|
|||
|
|
if mode == 'filter' or mode == 'all':
|
|||
|
|
if filter_rules:
|
|||
|
|
result = DataTransformer.transform_filter(result, filter_rules)
|
|||
|
|
|
|||
|
|
if mode == 'compute' or mode == 'all':
|
|||
|
|
if compute_rules:
|
|||
|
|
result = DataTransformer.transform_compute(result, compute_rules)
|
|||
|
|
|
|||
|
|
return result
|
|||
|
|
|
|||
|
|
|
|||
|
|
# 全局实例
|
|||
|
|
data_transformer = DataTransformer()
|