backend/app/services/data_transformer.py

"""
数据转换服务
支持字段映射、数据过滤、数据转换等功能
"""
from typing import Dict, Any, List, Optional, Union
import json
import re


class DataTransformer:
    """数据转换器"""
    
    @staticmethod
    def get_nested_value(data: Dict[str, Any], path: str) -> Any:
        """
        从嵌套字典中获取值
        
        Args:
            data: 数据字典
            path: 路径，如 'user.name' 或 'items[0].price'
            
        Returns:
            值，如果不存在返回None
        """
        try:
            # 处理混合路径，如 items[0].price
            if '[' in path and ']' in path:
                # 先处理数组索引部分
                bracket_match = re.search(r'(\w+)\[(\d+)\]', path)
                if bracket_match:
                    array_key = bracket_match.group(1)
                    array_index = int(bracket_match.group(2))
                    rest_path = path[bracket_match.end():]
                    
                    # 获取数组
                    if array_key in data and isinstance(data[array_key], list):
                        if array_index < len(data[array_key]):
                            array_item = data[array_key][array_index]
                            # 如果还有后续路径，继续获取
                            if rest_path.startswith('.'):
                                return DataTransformer.get_nested_value(array_item, rest_path[1:])
                            else:
                                return array_item
                    return None
            
            # 处理嵌套路径，如 user.name
            keys = path.split('.')
            value = data
            for key in keys:
                if isinstance(value, dict):
                    value = value.get(key)
                elif isinstance(value, list) and key.isdigit():
                    value = value[int(key)] if int(key) < len(value) else None
                else:
                    return None
                if value is None:
                    return None
            return value
        except (KeyError, IndexError, TypeError, AttributeError):
            return None
    
    @staticmethod
    def set_nested_value(data: Dict[str, Any], path: str, value: Any) -> None:
        """
        在嵌套字典中设置值
        
        Args:
            data: 数据字典
            path: 路径，如 'user.name'
            value: 要设置的值
        """
        keys = path.split('.')
        current = data
        
        # 创建嵌套结构
        for key in keys[:-1]:
            if key not in current:
                current[key] = {}
            current = current[key]
        
        # 设置值
        current[keys[-1]] = value
    
    @staticmethod
    def transform_mapping(input_data: Dict[str, Any], mapping: Dict[str, str]) -> Dict[str, Any]:
        """
        字段映射转换
        
        Args:
            input_data: 输入数据
            mapping: 映射规则，格式: {"target_key": "source_key"}
            
        Returns:
            转换后的数据
        """
        result = {}
        for target_key, source_key in mapping.items():
            value = DataTransformer.get_nested_value(input_data, source_key)
            if value is not None:
                # 如果目标键包含点或方括号，使用嵌套设置
                if '.' in target_key or '[' in target_key:
                    DataTransformer.set_nested_value(result, target_key, value)
                else:
                    # 简单键直接设置
                    result[target_key] = value
        return result
    
    @staticmethod
    def transform_filter(input_data: Dict[str, Any], filter_rules: List[Dict[str, Any]]) -> Dict[str, Any]:
        """
        数据过滤
        
        Args:
            input_data: 输入数据
            filter_rules: 过滤规则列表，格式: [{"field": "key", "operator": ">", "value": 10}]
            
        Returns:
            过滤后的数据
        """
        result = {}
        
        for rule in filter_rules:
            field = rule.get('field')
            operator = rule.get('operator', '==')
            value = rule.get('value')
            
            if not field:
                continue
            
            field_value = DataTransformer.get_nested_value(input_data, field)
            
            # 应用过滤规则
            should_include = False
            if operator == '==' and field_value == value:
                should_include = True
            elif operator == '!=' and field_value != value:
                should_include = True
            elif operator == '>' and field_value > value:
                should_include = True
            elif operator == '>=' and field_value >= value:
                should_include = True
            elif operator == '<' and field_value < value:
                should_include = True
            elif operator == '<=' and field_value <= value:
                should_include = True
            elif operator == 'in' and field_value in value:
                should_include = True
            elif operator == 'not in' and field_value not in value:
                should_include = True
            
            if should_include:
                # 包含该字段
                if field in input_data:
                    result[field] = input_data[field]
                else:
                    # 如果是嵌套字段，需要重建结构
                    DataTransformer.set_nested_value(result, field, field_value)
        
        return result if result else input_data
    
    @staticmethod
    def transform_compute(input_data: Dict[str, Any], compute_rules: Dict[str, str]) -> Dict[str, Any]:
        """
        数据计算转换
        
        Args:
            input_data: 输入数据
            compute_rules: 计算规则，格式: {"result": "{a} + {b}"}
            
        Returns:
            转换后的数据
        """
        result = input_data.copy()
        
        for target_key, expression in compute_rules.items():
            try:
                # 替换变量
                computed_expression = expression
                for key, value in input_data.items():
                    placeholder = f'{{{key}}}'
                    if placeholder in computed_expression:
                        if isinstance(value, (dict, list)):
                            computed_expression = computed_expression.replace(
                                placeholder, 
                                json.dumps(value, ensure_ascii=False)
                            )
                        else:
                            computed_expression = computed_expression.replace(
                                placeholder, 
                                str(value)
                            )
                
                # 安全评估表达式
                safe_dict = {
                    '__builtins__': {},
                    'abs': abs,
                    'min': min,
                    'max': max,
                    'sum': sum,
                    'len': len,
                }
                
                # 添加输入数据中的值
                for key, value in input_data.items():
                    if isinstance(value, (str, int, float, bool, type(None))):
                        safe_dict[key] = value
                
                computed_value = eval(computed_expression, safe_dict)
                result[target_key] = computed_value
            except Exception as e:
                # 计算失败，跳过该字段
                result[target_key] = None
        
        return result
    
    @staticmethod
    def transform_data(
        input_data: Dict[str, Any],
        mapping: Optional[Dict[str, str]] = None,
        filter_rules: Optional[List[Dict[str, Any]]] = None,
        compute_rules: Optional[Dict[str, str]] = None,
        mode: str = 'mapping'
    ) -> Dict[str, Any]:
        """
        数据转换（综合方法）
        
        Args:
            input_data: 输入数据
            mapping: 字段映射规则
            filter_rules: 过滤规则
            compute_rules: 计算规则
            mode: 转换模式 ('mapping', 'filter', 'compute', 'all')
            
        Returns:
            转换后的数据
        """
        result = input_data.copy()
        
        if mode == 'mapping' or mode == 'all':
            if mapping:
                result = DataTransformer.transform_mapping(result, mapping)
        
        if mode == 'filter' or mode == 'all':
            if filter_rules:
                result = DataTransformer.transform_filter(result, filter_rules)
        
        if mode == 'compute' or mode == 'all':
            if compute_rules:
                result = DataTransformer.transform_compute(result, compute_rules)
        
        return result


# 全局实例
data_transformer = DataTransformer()