Skip to content

Commit

Permalink
Feat/0.4.1 (#1059)
Browse files Browse the repository at this point in the history
  • Loading branch information
zgqgit authored Jan 3, 2025
2 parents a0cf33e + 7053f47 commit 27dda16
Show file tree
Hide file tree
Showing 213 changed files with 6,044 additions and 2,885 deletions.
6 changes: 3 additions & 3 deletions .drone.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ steps: # 定义流水线执行步骤,这些步骤将顺序执行
- REPO2=$(echo $REPO | sed 's/http:\\/\\///g')
- sed '/apt-get/ s|$| '"$PROXY"'|' Dockerfile
- sed -i 's/^bisheng_langchain.*/bisheng_langchain = "'$RELEASE_VERSION'"/g' pyproject.toml
- sed -i '16i\RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple' Dockerfile
- sed -i '38i\RUN poetry source add --priority=supplemental foo http://'$NEXUS_PUBLIC':'$NEXUS_PUBLIC_PASSWORD'@'$REPO2'simple' Dockerfile
- sed -i '38i\RUN poetry source add --priority=primary qh https://pypi.tuna.tsinghua.edu.cn/simple' Dockerfile
- sed -i '6i\RUN pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple' Dockerfile
- sed -i '7i\RUN poetry source add --priority=supplemental foo http://'$NEXUS_PUBLIC':'$NEXUS_PUBLIC_PASSWORD'@'$REPO2'simple' Dockerfile
- sed -i '8i\RUN poetry source add --priority=primary qh https://pypi.tuna.tsinghua.edu.cn/simple' Dockerfile
- cat Dockerfile

- name: build_docker
Expand Down
92 changes: 92 additions & 0 deletions .github/workflows/base_ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
name: BASE_CI

on:
push:
# Sequence of patterns matched against refs/tags
tags:
- "base.v*"

env:
DOCKERHUB_REPO: dataelement/

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true

jobs:
build_bisheng:
runs-on: ubuntu-latest
# if: startsWith(github.event.ref, 'refs/tags')
steps:
- name: checkout
uses: actions/checkout@v2

- name: Get version
id: get_version
run: |
echo ::set-output name=VERSION::${GITHUB_REF/refs\/tags\//}
- name: Set Environment Variable
run: echo "RELEASE_VERSION=${{ steps.get_version.outputs.VERSION }}" >> $GITHUB_ENV

# 登录 docker hub
- name: Login to DockerHub
uses: docker/login-action@v1
with:
# GitHub Repo => Settings => Secrets 增加 docker hub 登录密钥信息
# DOCKERHUB_USERNAME 是 docker hub 账号名.
# DOCKERHUB_TOKEN: docker hub => Account Setting => Security 创建.
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

# - name: Login to DockerHub
# uses: docker/login-action@v1
# with:
# registry: https://cr.dataelem.com/
# username: ${{ secrets.CR_DOCKERHUB_USERNAME }}
# password: ${{ secrets.CR_DOCKERHUB_TOKEN }}

# 构建 backend 并推送到 Docker hub
- name: Set up QEMU
uses: docker/setup-qemu-action@v1

- name: set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: install poetry
uses: snok/install-poetry@v1
with:
installer-parallel: true

- name: Build backend and push
id: docker_build_backend
uses: docker/build-push-action@v2
with:
# backend 的context目录
context: "./src/backend/"
file: ./src/backend/base.Dockerfile
# 是否 docker push
push: true
# docker build arg, 注入 APP_NAME/APP_VERSION
platforms: linux/amd64,linux/arm64
build-args: |
APP_NAME="bisheng-backend"
APP_VERSION=${{ steps.get_version.outputs.VERSION }}
# 生成两个 docker tag: ${APP_VERSION} 和 latest
tags: |
${{ env.DOCKERHUB_REPO }}bisheng-backend:${{ steps.get_version.outputs.VERSION }}
# 构建 Docker frontend 并推送到 Docker hub
- name: Process git message
id: process_message
run: |
value=$(echo "${{ github.event.head_commit.message }}" | sed -e ':a' -e 'N' -e '$!ba' -e 's/\n/%0A/g')
value=$(echo "${value}" | sed -e ':a' -e 'N' -e '$!ba' -e 's/\r/%0A/g')
echo "message=${value}" >> $GITHUB_ENV
shell: bash

- name: notify feishu
uses: fjogeleit/http-request-action@v1
with:
url: ${{ secrets.FEISHU_WEBHOOK }}
method: 'POST'
data: '{"msg_type":"post","content":{"post":{"zh_cn":{"title": "${{ steps.get_version.outputs.VERSION }}发布成功", "content": [[{"tag":"text","text":"基础镜像"},{"tag":"text","text":"${{ env.message }}"}]]}}}}'
12 changes: 3 additions & 9 deletions docker/bisheng/config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ redis_url: "redis://redis:6379/1"
# sentinel_password: encrypt(gAAAAABlp4b4c59FeVGF_OQRVf6NOUIGdxq8246EBD-b0hdK_jVKRs1x4PoAn0A6C5S6IiFKmWn0Nm5eBUWu-7jxcqw6TiVjQA==)
# db: 1

# celery的broken地址
celery_redis_url: "redis://redis:6379/2"

# 知识库的milvus和es配置 支持使用 !env ${PATH} 填写环境变量的值, 若环境变量不存在则会报错
vector_stores:
Expand Down Expand Up @@ -60,19 +62,11 @@ logger_conf:
# 日志级别
level: INFO
# 日志格式化函数,extra内支持trace_id
format: "[{time:YYYY-MM-DD HH:mm:ss.SSSSSS}]|{level}|BISHENG|{extra[trace_id]}|{process.id}|{thread.id}|{message}"
format: '<level>[{time:YYYY-MM-DD HH:mm:ss.SSSSSS}] [{level.name} process-{process.id}-{thread.id} {name}:{line}]</level> - <level>trace={extra[trace_id]} {message}</level>'
# 每天的几点进行切割
rotation: "00:00"
retention: "3 Days"
enqueue: ture
- sink: "/app/data/err-v0-BISHENG-{HOSTNAME}.log"
level: ERROR
# 和原生不一样,后端会将配置使用eval()执行转为函数用来过滤特定日志级别。推荐lambda
filter: "lambda record: record['level'].name == 'ERROR'"
format: "[{time:YYYY-MM-DD HH:mm:ss.SSSSSS}]|{level}|BISHENG|{extra[trace_id]}||{process.id}|{thread.id}|||#EX_ERR:POS={name},line {line},ERR=500,EMSG={message}"
rotation: "00:00"
retention: "3 Days"
enqueue: ture
- sink: "/app/data/statistic.log"
level: INFO
# 和原生不一样,后端会将配置使用eval()执行转为函数用来过滤特定日志级别。推荐lambda
Expand Down
4 changes: 4 additions & 0 deletions docker/bisheng/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
nohup uvicorn bisheng.main:app --host 0.0.0.0 --port 7860 --no-access-log --workers 2 &

# -c 是指定celery的并发数
celery -A bisheng.worker.main worker -l info -c 4
4 changes: 2 additions & 2 deletions docker/docker-compose-uns.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ services:
- "10001:10001"
environment:
# 填写ocr_sdk或rt服务的根地址
server_address: bisheng-rt:9001
# server_address: bisheng-rt:9001
# 这里填 ocr_sdk 或 rt
server_type: ocr_sdk
# server_type: ocr_sdk
TZ: Asia/Shanghai
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/bisheng-uns/config.yaml:/opt/bisheng-unstructured/bisheng_unstructured/config/config.yaml
Expand Down
3 changes: 2 additions & 1 deletion docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,10 +72,11 @@ services:
BS_MINIO_SECRET_KEY: 'minioadmin'
volumes:
- ${DOCKER_VOLUME_DIRECTORY:-.}/bisheng/config/config.yaml:/app/bisheng/config.yaml
- ${DOCKER_VOLUME_DIRECTORY:-.}/bisheng/entrypoint.sh:/app/entrypoint.sh
- ${DOCKER_VOLUME_DIRECTORY:-.}/data/bisheng:/app/data
security_opt:
- seccomp:unconfined
command: bash -c "uvicorn bisheng.main:app --host 0.0.0.0 --port 7860 --no-access-log --workers 2" # --workers 表示使用几个进程,提高并发度
command: sh entrypoint.sh # --workers 表示使用几个进程,提高并发度
restart: on-failure
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:7860/health"]
Expand Down
38 changes: 3 additions & 35 deletions src/backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,42 +1,10 @@
FROM python:3.10-slim
FROM dataelement/bisheng-backend:base.v1

WORKDIR /app

RUN echo \
deb https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware contrib \
deb https://mirrors.aliyun.com/debian-security/ bookworm-security main \
deb-src https://mirrors.aliyun.com/debian-security/ bookworm-security main \
deb https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware contrib \
deb https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware contrib \
> /etc/apt/sources.list


# Install Poetry
RUN apt-get update && apt-get install gcc g++ curl build-essential postgresql-server-dev-all -y
RUN apt-get update && apt-get install procps -y
# Install font
RUN apt install vim fonts-wqy-zenhei -y
# opencv
RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender1 libxext6 libgl1
RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.2
# # Add Poetry to PATH
ENV PATH="${PATH}:/root/.local/bin"
# # Copy the pyproject.toml and poetry.lock files
# COPY poetry.lock pyproject.toml ./
# Copy the rest of the application codes
COPY ./ ./

# Install NLTK data
RUN mv -f ./nltk_data /root/nltk_data/

RUN python -m pip install --upgrade pip && \
pip install shapely==2.0.1

# Install dependencies
RUN poetry config virtualenvs.create false
RUN poetry install --no-interaction --no-ansi --without dev
RUN poetry update --without dev

CMD ["uvicorn", "bisheng.main:app", "--workers", "2", "--host", "0.0.0.0", "--port", "7860"]
CMD ["sh entrypoint.sh"]
42 changes: 42 additions & 0 deletions src/backend/base.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
FROM python:3.10-slim

WORKDIR /app

RUN echo \
deb https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm main non-free non-free-firmware contrib \
deb https://mirrors.aliyun.com/debian-security/ bookworm-security main \
deb-src https://mirrors.aliyun.com/debian-security/ bookworm-security main \
deb https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm-updates main non-free non-free-firmware contrib \
deb https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware contrib \
deb-src https://mirrors.aliyun.com/debian/ bookworm-backports main non-free non-free-firmware contrib \
> /etc/apt/sources.list


# Install Poetry
RUN apt-get update && apt-get install gcc g++ curl build-essential postgresql-server-dev-all -y
RUN apt-get update && apt-get install procps -y
# Install font
RUN apt install vim fonts-wqy-zenhei -y
# opencv
RUN apt-get update && apt-get install -y libglib2.0-0 libsm6 libxrender1 libxext6 libgl1
RUN curl -sSL https://install.python-poetry.org | python3 - --version 1.8.2
# # Add Poetry to PATH
ENV PATH="${PATH}:/root/.local/bin"
# # Copy the pyproject.toml and poetry.lock files
# COPY poetry.lock pyproject.toml ./
# Copy the rest of the application codes
COPY ./pyproject.toml ./

RUN python -m pip install --upgrade pip && \
pip install shapely==2.0.1

# Install dependencies
RUN poetry config virtualenvs.create false
RUN poetry install --no-interaction --no-ansi --without dev

# install nltk_data
RUN python -c "import nltk; nltk.download('punkt'); nltk.download('punkt_tab'); nltk.download('averaged_perceptron_tagger'); nltk.download('averaged_perceptron_tagger_eng'); "

CMD ["sh entrypoint.sh"]
17 changes: 17 additions & 0 deletions src/backend/bisheng/api/errcode/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,27 @@ class FlowOnlineEditError(BaseErrorCode):
Code: int = 10521
Msg: str = '技能已上线,不可编辑'


class WorkFlowOnlineEditError(BaseErrorCode):
Code: int = 10525
Msg: str = '工作流已上线,不可编辑'


class WorkFlowInitError(BaseErrorCode):
Code: int = 10526
Msg: str = '工作流初始化失败'


class WorkFlowWaitUserTimeoutError(BaseErrorCode):
Code: int = 10527
Msg: str = '工作流等待用户输入超时'


class WorkFlowNodeRunMaxTimesError(BaseErrorCode):
Code: int = 10528
Msg: str = '节点执行超过最大次数'


class FlowTemplateNameError(BaseErrorCode):
Code: int = 10530
Msg: str = '模板名称已存在'
3 changes: 2 additions & 1 deletion src/backend/bisheng/api/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
report_router, server_router, skillcenter_router, tag_router,
user_router, validate_router, variable_router, workflow_router)
from bisheng.api.v2 import (assistant_router_rpc, chat_router_rpc, flow_router,
knowledge_router_rpc, rpc_router_rpc)
knowledge_router_rpc, rpc_router_rpc, workflow_router_rpc)
from fastapi import APIRouter

router = APIRouter(prefix='/api/v1', )
Expand Down Expand Up @@ -37,3 +37,4 @@
router_rpc.include_router(rpc_router_rpc)
router_rpc.include_router(flow_router)
router_rpc.include_router(assistant_router_rpc)
router_rpc.include_router(workflow_router_rpc)
19 changes: 19 additions & 0 deletions src/backend/bisheng/api/services/audit_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,16 @@ def create_chat_flow(cls, user: UserPayload, ip_address: str, flow_id: str):
cls._chat_log(user, ip_address, EventType.CREATE_CHAT, ObjectType.FLOW,
flow_id, flow_info.name, ResourceTypeEnum.FLOW)

@classmethod
def create_chat_workflow(cls, user: UserPayload, ip_address: str, flow_id: str):
"""
新建工作流会话的审计日志
"""
logger.info(f"act=create_chat_workflow user={user.user_name} ip={ip_address} flow={flow_id}")
flow_info = FlowDao.get_flow_by_id(flow_id)
cls._chat_log(user, ip_address, EventType.CREATE_CHAT, ObjectType.WORK_FLOW,
flow_id, flow_info.name, ResourceTypeEnum.WORK_FLOW)

@classmethod
def delete_chat_flow(cls, user: UserPayload, ip_address: str, flow_info: Flow):
"""
Expand All @@ -98,6 +108,15 @@ def delete_chat_flow(cls, user: UserPayload, ip_address: str, flow_info: Flow):
cls._chat_log(user, ip_address, EventType.DELETE_CHAT, ObjectType.FLOW,
flow_info.id.hex, flow_info.name, ResourceTypeEnum.FLOW)

@classmethod
def delete_chat_workflow(cls, user: UserPayload, ip_address: str, flow_info: Flow):
"""
删除技能会话的审计日志
"""
logger.info(f"act=delete_chat_workflow user={user.user_name} ip={ip_address} flow={flow_info.id}")
cls._chat_log(user, ip_address, EventType.DELETE_CHAT, ObjectType.WORK_FLOW,
flow_info.id.hex, flow_info.name, ResourceTypeEnum.WORK_FLOW)

@classmethod
def delete_chat_assistant(cls, user: UserPayload, ip_address: str, assistant_info: Assistant):
"""
Expand Down
4 changes: 2 additions & 2 deletions src/backend/bisheng/api/services/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,8 @@ def update_version_info(cls, request: Request, user: UserPayload, version_id: in
if not user.access_check(flow_info.user_id, flow_info.id.hex, atype):
return UnAuthorizedError.return_resp()

# 版本是当前版本, 且技能处于上线状态则不可编辑
if version_info.is_current == 1 and flow_info.status == FlowStatus.ONLINE.value:
# 版本是当前版本, 且技能处于上线状态则不可编辑data数据,名称和描述可以编辑
if version_info.is_current == 1 and flow_info.status == FlowStatus.ONLINE.value and flow_version.data:
if flow_info.flow_type == FlowType.WORKFLOW.value:
return WorkFlowOnlineEditError.return_resp()
else:
Expand Down
7 changes: 5 additions & 2 deletions src/backend/bisheng/api/services/knowledge_imp.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,10 @@ def parse_partitions(partitions: List[Any]) -> Dict:
text = part['text']
for index, bbox in enumerate(bboxes):
key = f'{pages[index]}-' + '-'.join([str(int(one)) for one in bbox])
val = text[indexes[index][0]:indexes[index][1] + 1]
if index == len(bboxes) -1:
val = text[indexes[index][0]:]
else:
val = text[indexes[index][0]:indexes[index][1] + 1]
res[key] = {'text': val, 'type': part['type'], 'part_id': part_index}
return res

Expand All @@ -414,7 +417,7 @@ def read_chunk_text(input_file, file_name, separator: List[str], separator_rule:
llm = decide_knowledge_llm()
except Exception as e:
logger.exception('knowledge_llm_error:')
raise Exception(f'知识库总结所需模型配置有误,初始化失败, {str(e)}')
raise Exception(f'文档知识库总结模型已失效,请前往模型管理-系统模型设置中进行配置。{str(e)}')
text_splitter = ElemCharacterTextSplitter(separators=separator,
separator_rule=separator_rule,
chunk_size=chunk_size,
Expand Down
Loading

0 comments on commit 27dda16

Please sign in to comment.