Files
aiagent/backend/app/services/data_transformer.py

256 lines
8.9 KiB
Python
Raw Normal View History

2026-01-19 00:09:36 +08:00
"""
数据转换服务
支持字段映射数据过滤数据转换等功能
"""
from typing import Dict, Any, List, Optional, Union
import json
import re
class DataTransformer:
"""数据转换器"""
@staticmethod
def get_nested_value(data: Dict[str, Any], path: str) -> Any:
"""
从嵌套字典中获取值
Args:
data: 数据字典
path: 路径 'user.name' 'items[0].price'
Returns:
如果不存在返回None
"""
try:
# 处理混合路径,如 items[0].price
if '[' in path and ']' in path:
# 先处理数组索引部分
bracket_match = re.search(r'(\w+)\[(\d+)\]', path)
if bracket_match:
array_key = bracket_match.group(1)
array_index = int(bracket_match.group(2))
rest_path = path[bracket_match.end():]
# 获取数组
if array_key in data and isinstance(data[array_key], list):
if array_index < len(data[array_key]):
array_item = data[array_key][array_index]
# 如果还有后续路径,继续获取
if rest_path.startswith('.'):
return DataTransformer.get_nested_value(array_item, rest_path[1:])
else:
return array_item
return None
# 处理嵌套路径,如 user.name
keys = path.split('.')
value = data
for key in keys:
if isinstance(value, dict):
value = value.get(key)
elif isinstance(value, list) and key.isdigit():
value = value[int(key)] if int(key) < len(value) else None
else:
return None
if value is None:
return None
return value
except (KeyError, IndexError, TypeError, AttributeError):
return None
@staticmethod
def set_nested_value(data: Dict[str, Any], path: str, value: Any) -> None:
"""
在嵌套字典中设置值
Args:
data: 数据字典
path: 路径 'user.name'
value: 要设置的值
"""
keys = path.split('.')
current = data
# 创建嵌套结构
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
# 设置值
current[keys[-1]] = value
@staticmethod
def transform_mapping(input_data: Dict[str, Any], mapping: Dict[str, str]) -> Dict[str, Any]:
"""
字段映射转换
Args:
input_data: 输入数据
mapping: 映射规则格式: {"target_key": "source_key"}
Returns:
转换后的数据
"""
result = {}
for target_key, source_key in mapping.items():
value = DataTransformer.get_nested_value(input_data, source_key)
if value is not None:
# 如果目标键包含点或方括号,使用嵌套设置
if '.' in target_key or '[' in target_key:
DataTransformer.set_nested_value(result, target_key, value)
else:
# 简单键直接设置
result[target_key] = value
return result
@staticmethod
def transform_filter(input_data: Dict[str, Any], filter_rules: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
数据过滤
Args:
input_data: 输入数据
filter_rules: 过滤规则列表格式: [{"field": "key", "operator": ">", "value": 10}]
Returns:
过滤后的数据
"""
result = {}
for rule in filter_rules:
field = rule.get('field')
operator = rule.get('operator', '==')
value = rule.get('value')
if not field:
continue
field_value = DataTransformer.get_nested_value(input_data, field)
# 应用过滤规则
should_include = False
if operator == '==' and field_value == value:
should_include = True
elif operator == '!=' and field_value != value:
should_include = True
elif operator == '>' and field_value > value:
should_include = True
elif operator == '>=' and field_value >= value:
should_include = True
elif operator == '<' and field_value < value:
should_include = True
elif operator == '<=' and field_value <= value:
should_include = True
elif operator == 'in' and field_value in value:
should_include = True
elif operator == 'not in' and field_value not in value:
should_include = True
if should_include:
# 包含该字段
if field in input_data:
result[field] = input_data[field]
else:
# 如果是嵌套字段,需要重建结构
DataTransformer.set_nested_value(result, field, field_value)
return result if result else input_data
@staticmethod
def transform_compute(input_data: Dict[str, Any], compute_rules: Dict[str, str]) -> Dict[str, Any]:
"""
数据计算转换
Args:
input_data: 输入数据
compute_rules: 计算规则格式: {"result": "{a} + {b}"}
Returns:
转换后的数据
"""
result = input_data.copy()
for target_key, expression in compute_rules.items():
try:
# 替换变量
computed_expression = expression
for key, value in input_data.items():
placeholder = f'{{{key}}}'
if placeholder in computed_expression:
if isinstance(value, (dict, list)):
computed_expression = computed_expression.replace(
placeholder,
json.dumps(value, ensure_ascii=False)
)
else:
computed_expression = computed_expression.replace(
placeholder,
str(value)
)
# 安全评估表达式
safe_dict = {
'__builtins__': {},
'abs': abs,
'min': min,
'max': max,
'sum': sum,
'len': len,
}
# 添加输入数据中的值
for key, value in input_data.items():
if isinstance(value, (str, int, float, bool, type(None))):
safe_dict[key] = value
computed_value = eval(computed_expression, safe_dict)
result[target_key] = computed_value
except Exception as e:
# 计算失败,跳过该字段
result[target_key] = None
return result
@staticmethod
def transform_data(
input_data: Dict[str, Any],
mapping: Optional[Dict[str, str]] = None,
filter_rules: Optional[List[Dict[str, Any]]] = None,
compute_rules: Optional[Dict[str, str]] = None,
mode: str = 'mapping'
) -> Dict[str, Any]:
"""
数据转换综合方法
Args:
input_data: 输入数据
mapping: 字段映射规则
filter_rules: 过滤规则
compute_rules: 计算规则
mode: 转换模式 ('mapping', 'filter', 'compute', 'all')
Returns:
转换后的数据
"""
result = input_data.copy()
if mode == 'mapping' or mode == 'all':
if mapping:
result = DataTransformer.transform_mapping(result, mapping)
if mode == 'filter' or mode == 'all':
if filter_rules:
result = DataTransformer.transform_filter(result, filter_rules)
if mode == 'compute' or mode == 'all':
if compute_rules:
result = DataTransformer.transform_compute(result, compute_rules)
return result
# 全局实例
data_transformer = DataTransformer()