Files
aiagent/backend/app/services/data_transformer.py
2026-01-19 00:09:36 +08:00

256 lines
8.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
数据转换服务
支持字段映射、数据过滤、数据转换等功能
"""
from typing import Dict, Any, List, Optional, Union
import json
import re
class DataTransformer:
"""数据转换器"""
@staticmethod
def get_nested_value(data: Dict[str, Any], path: str) -> Any:
"""
从嵌套字典中获取值
Args:
data: 数据字典
path: 路径,如 'user.name''items[0].price'
Returns:
如果不存在返回None
"""
try:
# 处理混合路径,如 items[0].price
if '[' in path and ']' in path:
# 先处理数组索引部分
bracket_match = re.search(r'(\w+)\[(\d+)\]', path)
if bracket_match:
array_key = bracket_match.group(1)
array_index = int(bracket_match.group(2))
rest_path = path[bracket_match.end():]
# 获取数组
if array_key in data and isinstance(data[array_key], list):
if array_index < len(data[array_key]):
array_item = data[array_key][array_index]
# 如果还有后续路径,继续获取
if rest_path.startswith('.'):
return DataTransformer.get_nested_value(array_item, rest_path[1:])
else:
return array_item
return None
# 处理嵌套路径,如 user.name
keys = path.split('.')
value = data
for key in keys:
if isinstance(value, dict):
value = value.get(key)
elif isinstance(value, list) and key.isdigit():
value = value[int(key)] if int(key) < len(value) else None
else:
return None
if value is None:
return None
return value
except (KeyError, IndexError, TypeError, AttributeError):
return None
@staticmethod
def set_nested_value(data: Dict[str, Any], path: str, value: Any) -> None:
"""
在嵌套字典中设置值
Args:
data: 数据字典
path: 路径,如 'user.name'
value: 要设置的值
"""
keys = path.split('.')
current = data
# 创建嵌套结构
for key in keys[:-1]:
if key not in current:
current[key] = {}
current = current[key]
# 设置值
current[keys[-1]] = value
@staticmethod
def transform_mapping(input_data: Dict[str, Any], mapping: Dict[str, str]) -> Dict[str, Any]:
"""
字段映射转换
Args:
input_data: 输入数据
mapping: 映射规则,格式: {"target_key": "source_key"}
Returns:
转换后的数据
"""
result = {}
for target_key, source_key in mapping.items():
value = DataTransformer.get_nested_value(input_data, source_key)
if value is not None:
# 如果目标键包含点或方括号,使用嵌套设置
if '.' in target_key or '[' in target_key:
DataTransformer.set_nested_value(result, target_key, value)
else:
# 简单键直接设置
result[target_key] = value
return result
@staticmethod
def transform_filter(input_data: Dict[str, Any], filter_rules: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
数据过滤
Args:
input_data: 输入数据
filter_rules: 过滤规则列表,格式: [{"field": "key", "operator": ">", "value": 10}]
Returns:
过滤后的数据
"""
result = {}
for rule in filter_rules:
field = rule.get('field')
operator = rule.get('operator', '==')
value = rule.get('value')
if not field:
continue
field_value = DataTransformer.get_nested_value(input_data, field)
# 应用过滤规则
should_include = False
if operator == '==' and field_value == value:
should_include = True
elif operator == '!=' and field_value != value:
should_include = True
elif operator == '>' and field_value > value:
should_include = True
elif operator == '>=' and field_value >= value:
should_include = True
elif operator == '<' and field_value < value:
should_include = True
elif operator == '<=' and field_value <= value:
should_include = True
elif operator == 'in' and field_value in value:
should_include = True
elif operator == 'not in' and field_value not in value:
should_include = True
if should_include:
# 包含该字段
if field in input_data:
result[field] = input_data[field]
else:
# 如果是嵌套字段,需要重建结构
DataTransformer.set_nested_value(result, field, field_value)
return result if result else input_data
@staticmethod
def transform_compute(input_data: Dict[str, Any], compute_rules: Dict[str, str]) -> Dict[str, Any]:
"""
数据计算转换
Args:
input_data: 输入数据
compute_rules: 计算规则,格式: {"result": "{a} + {b}"}
Returns:
转换后的数据
"""
result = input_data.copy()
for target_key, expression in compute_rules.items():
try:
# 替换变量
computed_expression = expression
for key, value in input_data.items():
placeholder = f'{{{key}}}'
if placeholder in computed_expression:
if isinstance(value, (dict, list)):
computed_expression = computed_expression.replace(
placeholder,
json.dumps(value, ensure_ascii=False)
)
else:
computed_expression = computed_expression.replace(
placeholder,
str(value)
)
# 安全评估表达式
safe_dict = {
'__builtins__': {},
'abs': abs,
'min': min,
'max': max,
'sum': sum,
'len': len,
}
# 添加输入数据中的值
for key, value in input_data.items():
if isinstance(value, (str, int, float, bool, type(None))):
safe_dict[key] = value
computed_value = eval(computed_expression, safe_dict)
result[target_key] = computed_value
except Exception as e:
# 计算失败,跳过该字段
result[target_key] = None
return result
@staticmethod
def transform_data(
input_data: Dict[str, Any],
mapping: Optional[Dict[str, str]] = None,
filter_rules: Optional[List[Dict[str, Any]]] = None,
compute_rules: Optional[Dict[str, str]] = None,
mode: str = 'mapping'
) -> Dict[str, Any]:
"""
数据转换(综合方法)
Args:
input_data: 输入数据
mapping: 字段映射规则
filter_rules: 过滤规则
compute_rules: 计算规则
mode: 转换模式 ('mapping', 'filter', 'compute', 'all')
Returns:
转换后的数据
"""
result = input_data.copy()
if mode == 'mapping' or mode == 'all':
if mapping:
result = DataTransformer.transform_mapping(result, mapping)
if mode == 'filter' or mode == 'all':
if filter_rules:
result = DataTransformer.transform_filter(result, filter_rules)
if mode == 'compute' or mode == 'all':
if compute_rules:
result = DataTransformer.transform_compute(result, compute_rules)
return result
# 全局实例
data_transformer = DataTransformer()