Annotation management (#1767)

Co-authored-by: jyong <jyong@dify.ai>
This commit is contained in:
Jyong
2023-12-18 13:10:05 +08:00
committed by GitHub
parent a9b942981d
commit a71f2863ac
41 changed files with 1871 additions and 67 deletions

View File

@@ -0,0 +1,426 @@
import datetime
import json
import uuid
import pandas as pd
from flask_login import current_user
from sqlalchemy import or_
from werkzeug.datastructures import FileStorage
from werkzeug.exceptions import NotFound
from extensions.ext_database import db
from extensions.ext_redis import redis_client
from models.model import MessageAnnotation, Message, App, AppAnnotationHitHistory, AppAnnotationSetting
from tasks.annotation.add_annotation_to_index_task import add_annotation_to_index_task
from tasks.annotation.enable_annotation_reply_task import enable_annotation_reply_task
from tasks.annotation.disable_annotation_reply_task import disable_annotation_reply_task
from tasks.annotation.update_annotation_to_index_task import update_annotation_to_index_task
from tasks.annotation.delete_annotation_index_task import delete_annotation_index_task
from tasks.annotation.batch_import_annotations_task import batch_import_annotations_task
class AppAnnotationService:
@classmethod
def up_insert_app_annotation_from_message(cls, args: dict, app_id: str) -> MessageAnnotation:
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
if 'message_id' in args and args['message_id']:
message_id = str(args['message_id'])
# get message info
message = db.session.query(Message).filter(
Message.id == message_id,
Message.app_id == app.id
).first()
if not message:
raise NotFound("Message Not Exists.")
annotation = message.annotation
# save the message annotation
if annotation:
annotation.content = args['answer']
annotation.question = args['question']
else:
annotation = MessageAnnotation(
app_id=app.id,
conversation_id=message.conversation_id,
message_id=message.id,
content=args['answer'],
question=args['question'],
account_id=current_user.id
)
else:
annotation = MessageAnnotation(
app_id=app.id,
content=args['answer'],
question=args['question'],
account_id=current_user.id
)
db.session.add(annotation)
db.session.commit()
# if annotation reply is enabled , add annotation to index
annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id).first()
if annotation_setting:
add_annotation_to_index_task.delay(annotation.id, args['question'], current_user.current_tenant_id,
app_id, annotation_setting.collection_binding_id)
return annotation
@classmethod
def enable_app_annotation(cls, args: dict, app_id: str) -> dict:
enable_app_annotation_key = 'enable_app_annotation_{}'.format(str(app_id))
cache_result = redis_client.get(enable_app_annotation_key)
if cache_result is not None:
return {
'job_id': cache_result,
'job_status': 'processing'
}
# async job
job_id = str(uuid.uuid4())
enable_app_annotation_job_key = 'enable_app_annotation_job_{}'.format(str(job_id))
# send batch add segments task
redis_client.setnx(enable_app_annotation_job_key, 'waiting')
enable_annotation_reply_task.delay(str(job_id), app_id, current_user.id, current_user.current_tenant_id,
args['score_threshold'],
args['embedding_provider_name'], args['embedding_model_name'])
return {
'job_id': job_id,
'job_status': 'waiting'
}
@classmethod
def disable_app_annotation(cls, app_id: str) -> dict:
disable_app_annotation_key = 'disable_app_annotation_{}'.format(str(app_id))
cache_result = redis_client.get(disable_app_annotation_key)
if cache_result is not None:
return {
'job_id': cache_result,
'job_status': 'processing'
}
# async job
job_id = str(uuid.uuid4())
disable_app_annotation_job_key = 'disable_app_annotation_job_{}'.format(str(job_id))
# send batch add segments task
redis_client.setnx(disable_app_annotation_job_key, 'waiting')
disable_annotation_reply_task.delay(str(job_id), app_id, current_user.current_tenant_id)
return {
'job_id': job_id,
'job_status': 'waiting'
}
@classmethod
def get_annotation_list_by_app_id(cls, app_id: str, page: int, limit: int, keyword: str):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
if keyword:
annotations = (db.session.query(MessageAnnotation)
.filter(MessageAnnotation.app_id == app_id)
.filter(
or_(
MessageAnnotation.question.ilike('%{}%'.format(keyword)),
MessageAnnotation.content.ilike('%{}%'.format(keyword))
)
)
.order_by(MessageAnnotation.created_at.desc())
.paginate(page=page, per_page=limit, max_per_page=100, error_out=False))
else:
annotations = (db.session.query(MessageAnnotation)
.filter(MessageAnnotation.app_id == app_id)
.order_by(MessageAnnotation.created_at.desc())
.paginate(page=page, per_page=limit, max_per_page=100, error_out=False))
return annotations.items, annotations.total
@classmethod
def export_annotation_list_by_app_id(cls, app_id: str):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotations = (db.session.query(MessageAnnotation)
.filter(MessageAnnotation.app_id == app_id)
.order_by(MessageAnnotation.created_at.desc()).all())
return annotations
@classmethod
def insert_app_annotation_directly(cls, args: dict, app_id: str) -> MessageAnnotation:
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation = MessageAnnotation(
app_id=app.id,
content=args['answer'],
question=args['question'],
account_id=current_user.id
)
db.session.add(annotation)
db.session.commit()
# if annotation reply is enabled , add annotation to index
annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id).first()
if annotation_setting:
add_annotation_to_index_task.delay(annotation.id, args['question'], current_user.current_tenant_id,
app_id, annotation_setting.collection_binding_id)
return annotation
@classmethod
def update_app_annotation_directly(cls, args: dict, app_id: str, annotation_id: str):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first()
if not annotation:
raise NotFound("Annotation not found")
annotation.content = args['answer']
annotation.question = args['question']
db.session.commit()
# if annotation reply is enabled , add annotation to index
app_annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id
).first()
if app_annotation_setting:
update_annotation_to_index_task.delay(annotation.id, annotation.question,
current_user.current_tenant_id,
app_id, app_annotation_setting.collection_binding_id)
return annotation
@classmethod
def delete_app_annotation(cls, app_id: str, annotation_id: str):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first()
if not annotation:
raise NotFound("Annotation not found")
db.session.delete(annotation)
annotation_hit_histories = (db.session.query(AppAnnotationHitHistory)
.filter(AppAnnotationHitHistory.annotation_id == annotation_id)
.all()
)
if annotation_hit_histories:
for annotation_hit_history in annotation_hit_histories:
db.session.delete(annotation_hit_history)
db.session.commit()
# if annotation reply is enabled , delete annotation index
app_annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id
).first()
if app_annotation_setting:
delete_annotation_index_task.delay(annotation.id, app_id,
current_user.current_tenant_id,
app_annotation_setting.collection_binding_id)
@classmethod
def batch_import_app_annotations(cls, app_id, file: FileStorage) -> dict:
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
try:
# Skip the first row
df = pd.read_csv(file)
result = []
for index, row in df.iterrows():
content = {
'question': row[0],
'answer': row[1]
}
result.append(content)
if len(result) == 0:
raise ValueError("The CSV file is empty.")
# async job
job_id = str(uuid.uuid4())
indexing_cache_key = 'app_annotation_batch_import_{}'.format(str(job_id))
# send batch add segments task
redis_client.setnx(indexing_cache_key, 'waiting')
batch_import_annotations_task.delay(str(job_id), result, app_id,
current_user.current_tenant_id, current_user.id)
except Exception as e:
return {
'error_msg': str(e)
}
return {
'job_id': job_id,
'job_status': 'waiting'
}
@classmethod
def get_annotation_hit_histories(cls, app_id: str, annotation_id: str, page, limit):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first()
if not annotation:
raise NotFound("Annotation not found")
annotation_hit_histories = (db.session.query(AppAnnotationHitHistory)
.filter(AppAnnotationHitHistory.app_id == app_id,
AppAnnotationHitHistory.annotation_id == annotation_id,
)
.order_by(AppAnnotationHitHistory.created_at.desc())
.paginate(page=page, per_page=limit, max_per_page=100, error_out=False))
return annotation_hit_histories.items, annotation_hit_histories.total
@classmethod
def get_annotation_by_id(cls, annotation_id: str) -> MessageAnnotation | None:
annotation = db.session.query(MessageAnnotation).filter(MessageAnnotation.id == annotation_id).first()
if not annotation:
return None
return annotation
@classmethod
def add_annotation_history(cls, annotation_id: str, app_id: str, annotation_question: str,
annotation_content: str, query: str, user_id: str,
message_id: str, from_source: str, score: float):
# add hit count to annotation
db.session.query(MessageAnnotation).filter(
MessageAnnotation.id == annotation_id
).update(
{MessageAnnotation.hit_count: MessageAnnotation.hit_count + 1},
synchronize_session=False
)
annotation_hit_history = AppAnnotationHitHistory(
annotation_id=annotation_id,
app_id=app_id,
account_id=user_id,
question=query,
source=from_source,
score=score,
message_id=message_id,
annotation_question=annotation_question,
annotation_content=annotation_content
)
db.session.add(annotation_hit_history)
db.session.commit()
@classmethod
def get_app_annotation_setting_by_app_id(cls, app_id: str):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id).first()
if annotation_setting:
collection_binding_detail = annotation_setting.collection_binding_detail
return {
"id": annotation_setting.id,
"enabled": True,
"score_threshold": annotation_setting.score_threshold,
"embedding_model": {
"embedding_provider_name": collection_binding_detail.provider_name,
"embedding_model_name": collection_binding_detail.model_name
}
}
return {
"enabled": False
}
@classmethod
def update_app_annotation_setting(cls, app_id: str, annotation_setting_id: str, args: dict):
# get app info
app = db.session.query(App).filter(
App.id == app_id,
App.tenant_id == current_user.current_tenant_id,
App.status == 'normal'
).first()
if not app:
raise NotFound("App not found")
annotation_setting = db.session.query(AppAnnotationSetting).filter(
AppAnnotationSetting.app_id == app_id,
AppAnnotationSetting.id == annotation_setting_id,
).first()
if not annotation_setting:
raise NotFound("App annotation not found")
annotation_setting.score_threshold = args['score_threshold']
annotation_setting.updated_user_id = current_user.id
annotation_setting.updated_at = datetime.datetime.utcnow()
db.session.add(annotation_setting)
db.session.commit()
collection_binding_detail = annotation_setting.collection_binding_detail
return {
"id": annotation_setting.id,
"enabled": True,
"score_threshold": annotation_setting.score_threshold,
"embedding_model": {
"embedding_provider_name": collection_binding_detail.provider_name,
"embedding_model_name": collection_binding_detail.model_name
}
}

View File

@@ -138,7 +138,22 @@ class AppModelConfigService:
config["retriever_resource"]["enabled"] = False
if not isinstance(config["retriever_resource"]["enabled"], bool):
raise ValueError("enabled in speech_to_text must be of boolean type")
raise ValueError("enabled in retriever_resource must be of boolean type")
# annotation reply
if 'annotation_reply' not in config or not config["annotation_reply"]:
config["annotation_reply"] = {
"enabled": False
}
if not isinstance(config["annotation_reply"], dict):
raise ValueError("annotation_reply must be of dict type")
if "enabled" not in config["annotation_reply"] or not config["annotation_reply"]["enabled"]:
config["annotation_reply"]["enabled"] = False
if not isinstance(config["annotation_reply"]["enabled"], bool):
raise ValueError("enabled in annotation_reply must be of boolean type")
# more_like_this
if 'more_like_this' not in config or not config["more_like_this"]:
@@ -325,6 +340,7 @@ class AppModelConfigService:
"suggested_questions_after_answer": config["suggested_questions_after_answer"],
"speech_to_text": config["speech_to_text"],
"retriever_resource": config["retriever_resource"],
"annotation_reply": config["annotation_reply"],
"more_like_this": config["more_like_this"],
"sensitive_word_avoidance": config["sensitive_word_avoidance"],
"external_data_tools": config["external_data_tools"],

View File

@@ -165,7 +165,8 @@ class CompletionService:
'streaming': streaming,
'is_model_config_override': is_model_config_override,
'retriever_from': args['retriever_from'] if 'retriever_from' in args else 'dev',
'auto_generate_name': auto_generate_name
'auto_generate_name': auto_generate_name,
'from_source': from_source
})
generate_worker_thread.start()
@@ -193,7 +194,7 @@ class CompletionService:
query: str, inputs: dict, files: List[PromptMessageFile],
detached_user: Union[Account, EndUser],
detached_conversation: Optional[Conversation], streaming: bool, is_model_config_override: bool,
retriever_from: str = 'dev', auto_generate_name: bool = True):
retriever_from: str = 'dev', auto_generate_name: bool = True, from_source: str = 'console'):
with flask_app.app_context():
# fixed the state of the model object when it detached from the original session
user = db.session.merge(detached_user)
@@ -218,7 +219,8 @@ class CompletionService:
streaming=streaming,
is_override=is_model_config_override,
retriever_from=retriever_from,
auto_generate_name=auto_generate_name
auto_generate_name=auto_generate_name,
from_source=from_source
)
except (ConversationTaskInterruptException, ConversationTaskStoppedException):
pass
@@ -385,6 +387,9 @@ class CompletionService:
result = json.loads(result)
if result.get('error'):
cls.handle_error(result)
if result['event'] == 'annotation' and 'data' in result:
message_result['annotation'] = result.get('data')
return cls.get_blocking_annotation_message_response_data(message_result)
if result['event'] == 'message' and 'data' in result:
message_result['message'] = result.get('data')
if result['event'] == 'message_end' and 'data' in result:
@@ -427,6 +432,9 @@ class CompletionService:
elif event == 'agent_thought':
yield "data: " + json.dumps(
cls.get_agent_thought_response_data(result.get('data'))) + "\n\n"
elif event == 'annotation':
yield "data: " + json.dumps(
cls.get_annotation_response_data(result.get('data'))) + "\n\n"
elif event == 'message_end':
yield "data: " + json.dumps(
cls.get_message_end_data(result.get('data'))) + "\n\n"
@@ -499,6 +507,25 @@ class CompletionService:
return response_data
@classmethod
def get_blocking_annotation_message_response_data(cls, data: dict):
message = data.get('annotation')
response_data = {
'event': 'annotation',
'task_id': message.get('task_id'),
'id': message.get('message_id'),
'answer': message.get('text'),
'metadata': {},
'created_at': int(time.time()),
'annotation_id': message.get('annotation_id'),
'annotation_author_name': message.get('annotation_author_name')
}
if message.get('mode') == 'chat':
response_data['conversation_id'] = message.get('conversation_id')
return response_data
@classmethod
def get_message_end_data(cls, data: dict):
response_data = {
@@ -551,6 +578,23 @@ class CompletionService:
return response_data
@classmethod
def get_annotation_response_data(cls, data: dict):
response_data = {
'event': 'annotation',
'task_id': data.get('task_id'),
'id': data.get('message_id'),
'answer': data.get('text'),
'created_at': int(time.time()),
'annotation_id': data.get('annotation_id'),
'annotation_author_name': data.get('annotation_author_name'),
}
if data.get('mode') == 'chat':
response_data['conversation_id'] = data.get('conversation_id')
return response_data
@classmethod
def handle_error(cls, result: dict):
logging.debug("error: %s", result)

View File

@@ -33,10 +33,7 @@ from tasks.clean_notion_document_task import clean_notion_document_task
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
from tasks.document_indexing_task import document_indexing_task
from tasks.document_indexing_update_task import document_indexing_update_task
from tasks.create_segment_to_index_task import create_segment_to_index_task
from tasks.update_segment_index_task import update_segment_index_task
from tasks.recover_document_indexing_task import recover_document_indexing_task
from tasks.update_segment_keyword_index_task import update_segment_keyword_index_task
from tasks.delete_segment_from_index_task import delete_segment_from_index_task
@@ -1175,10 +1172,12 @@ class SegmentService:
class DatasetCollectionBindingService:
@classmethod
def get_dataset_collection_binding(cls, provider_name: str, model_name: str) -> DatasetCollectionBinding:
def get_dataset_collection_binding(cls, provider_name: str, model_name: str,
collection_type: str = 'dataset') -> DatasetCollectionBinding:
dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
filter(DatasetCollectionBinding.provider_name == provider_name,
DatasetCollectionBinding.model_name == model_name). \
DatasetCollectionBinding.model_name == model_name,
DatasetCollectionBinding.type == collection_type). \
order_by(DatasetCollectionBinding.created_at). \
first()
@@ -1186,8 +1185,20 @@ class DatasetCollectionBindingService:
dataset_collection_binding = DatasetCollectionBinding(
provider_name=provider_name,
model_name=model_name,
collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node'
collection_name="Vector_index_" + str(uuid.uuid4()).replace("-", "_") + '_Node',
type=collection_type
)
db.session.add(dataset_collection_binding)
db.session.flush()
db.session.commit()
return dataset_collection_binding
@classmethod
def get_dataset_collection_binding_by_id_and_type(cls, collection_binding_id: str,
collection_type: str = 'dataset') -> DatasetCollectionBinding:
dataset_collection_binding = db.session.query(DatasetCollectionBinding). \
filter(DatasetCollectionBinding.id == collection_binding_id,
DatasetCollectionBinding.type == collection_type). \
order_by(DatasetCollectionBinding.created_at). \
first()
return dataset_collection_binding