diff --git a/doc/figure/exec-flow.png b/doc/figure/exec-flow.png new file mode 100644 index 0000000..b28b084 Binary files /dev/null and b/doc/figure/exec-flow.png differ diff --git a/doc/figure/execution_flow.key b/doc/figure/execution_flow.key new file mode 100644 index 0000000..4135597 Binary files /dev/null and b/doc/figure/execution_flow.key differ diff --git a/doc/figure/iobound.png b/doc/figure/iobound.png new file mode 100644 index 0000000..b3c07a4 Binary files /dev/null and b/doc/figure/iobound.png differ diff --git "a/doc/figure/leopard - \345\211\257\346\234\254.png" "b/doc/figure/leopard - \345\211\257\346\234\254.png" new file mode 100644 index 0000000..7d9190a Binary files /dev/null and "b/doc/figure/leopard - \345\211\257\346\234\254.png" differ diff --git a/doc/figure/leopard.png b/doc/figure/leopard.png new file mode 100644 index 0000000..60f0111 Binary files /dev/null and b/doc/figure/leopard.png differ diff --git a/doc/figure/release.jpg b/doc/figure/release.jpg new file mode 100644 index 0000000..18d114f Binary files /dev/null and b/doc/figure/release.jpg differ diff --git a/doc/pard-report.fls b/doc/pard-report.fls new file mode 100644 index 0000000..206f1ab --- /dev/null +++ b/doc/pard-report.fls @@ -0,0 +1,154 @@ +PWD /Users/Jelly/Developer/pard/doc +INPUT /usr/local/texlive/2015/texmf.cnf +INPUT /usr/local/texlive/2015/texmf-dist/web2c/texmf.cnf +INPUT /usr/local/texlive/2015/texmf-var/web2c/xetex/xelatex.fmt +INPUT pard-report.tex +OUTPUT pard-report.log +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/ctexart.cls +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/ctexart.cls +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/expl3.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/expl3.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/expl3-code.tex +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/expl3-code.tex +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/l3unicode-data.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/l3unicode-data.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/l3xdvipdfmx.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3kernel/l3xdvipdfmx.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/xparse/xparse.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/xparse/xparse.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/l3keys2e/l3keys2e.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/etoolbox/etoolbox.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/etoolbox/etoolbox.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/generic/oberdiek/ifpdf.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/generic/oberdiek/ifpdf.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/fix-cm.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/fix-cm.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/ts1enc.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/ts1enc.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ms/everysel.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ms/everysel.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctexopts.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctexopts.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/article.cls +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/article.cls +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/size12.clo +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/size12.clo +INPUT /usr/local/texlive/2015/texmf-dist/fonts/map/fontname/texfonts.map +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmr12.tfm +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/engine/ctex-engine-xetex.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/l3packages/xtemplate/xtemplate.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec-patches.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec-patches.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/fontenc.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/base/fontenc.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1enc.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1enc.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmr.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmr.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xunicode/xunicode.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xunicode/xunicode.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/tipa/t3enc.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmss.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmss.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/graphicx.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/graphicx.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/keyval.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/keyval.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/graphics.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/graphics.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/trig.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/graphics/trig.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/latexconfig/graphics.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/latexconfig/graphics.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xetex-def/xetex.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xetex-def/xetex.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/fontspec/fontspec.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xunicode-addon.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xunicode-addon.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xunicode/xunicode.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xunicode/xunicode.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xunicode-extra.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xunicode-extra.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJKfntef.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJKfntef.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/generic/ulem/ulem.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/generic/ulem/ulem.sty +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/latex-fonts/lasy6.tfm +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/environ/environ.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/environ/environ.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/trimspaces/trimspaces.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/trimspaces/trimspaces.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/cjk/texinput/CJKfntef.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/cjk/texinput/CJKfntef.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/cjk/texinput/CJKfntef.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/zhnumber/zhnumber.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/zhnumber/zhnumber.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/zhnumber/config/zhnumber-utf8.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/zhnumber/config/zhnumber-utf8.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/scheme/ctex-scheme-chinese-article.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctex-name-utf8.cfg +INPUT /dev/null +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-fandol.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/fontset/ctex-fontset-fandol.def +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctex.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/ctex/config/ctex.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/tools/enumerate.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/tools/enumerate.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/listings.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/listings.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstmisc.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstmisc.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/listings.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/listings.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/xcolor/xcolor.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/xcolor/xcolor.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/latexconfig/color.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/latexconfig/color.cfg +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK-listings.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/xelatex/xecjk/xeCJK-listings.sty +INPUT pard-report.aux +INPUT pard-report.aux +OUTPUT pard-report.aux +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/tipa/t3cmr.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/tipa/t3cmr.fd +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmmi12.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmr12.tfm +INPUT figure/leopard.png +INPUT pard-report.toc +INPUT pard-report.toc +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmmi12.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmmi8.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmmi6.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmsy10.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmsy8.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmsy6.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmr8.tfm +INPUT /usr/local/texlive/2015/texmf-dist/fonts/tfm/public/cm/cmr6.tfm +OUTPUT pard-report.toc +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmtt.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/euenc/eu1lmtt.fd +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstlang1.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstlang1.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstlang1.sty +INPUT /usr/local/texlive/2015/texmf-dist/tex/latex/listings/lstlang1.sty +INPUT figure/release.png +INPUT figure/architecture.png +INPUT figure/pard-node-in.png +INPUT figure/timeline.png +INPUT figure/execution_flow.pdf +INPUT pard-report.aux diff --git a/doc/pard-report.tex b/doc/pard-report.tex new file mode 100644 index 0000000..cf0a095 --- /dev/null +++ b/doc/pard-report.tex @@ -0,0 +1,498 @@ +\documentclass[a4paper, 12pt]{ctexart} + +\usepackage{enumerate} +\usepackage{graphicx} +\usepackage{listings} +\usepackage{xcolor} +%\usepackage{fancyhdr} + + +% code listings +\lstdefinestyle{customc}{ + belowcaptionskip=1\baselineskip, + breaklines=true, + frame=L, + xleftmargin=\parindent, + language=C, + showstringspaces=false, + basicstyle=\footnotesize\ttfamily, + keywordstyle=\bfseries\color{green!40!black}, + commentstyle=\itshape\color{purple!40!black}, + identifierstyle=\color{blue}, + stringstyle=\color{orange}, +} + +\lstset{basicstyle=\ttfamily\color{brown}, + showstringspaces=false, + commentstyle=\color{red}, + keywordstyle=\color{blue} +} + +%%%%%%%%%%%% author %%%%%%%%%%%%%%%% +\author{金国栋\\ + \and + 韩涵\\ + \and + 黄文韬\\ + \and + 陈成\\ +} +\title{分布与并行数据库Pard系统实验报告} +\date{\today} + + +%%%%%%%%%%%%%%%============== \begin{document} ====================%%%%%%%%%%%%%%%%%%% +\begin{document} + +\maketitle% +\hspace{8em} +\begin{figure}[h] + \centering + \includegraphics[width=0.7\linewidth]{figure/leopard.png} +\end{figure} + +\newpage + +\tableofcontents + + +\section{系统概述} +\subsection{任务描述} +本实验主要设计和实现一个面向分析的分布式数据库系统,即通过网络将多个不同的局部数据库系统连接起来, +使得用户可以通过分布式数据库管理系统达到透明性管理的目的,采用横向扩展的方式,扩展数据库系统的查询性能。 + +\subsection{系统需求} +\begin{enumerate} +\item 支持数据库的创建和删除。 +\item 支持表的创建和删除。 +\item 支持数据表的水平分片和垂直分片。 +\item 支持元数据的存储和管理。 +\item 支持数据从文件批量导入。 +\item 支持插入记录。 +\item 支持删除记录。 +\item 支持SQL语句\textit{select ... from ... where ...}。 +\end{enumerate} + +\subsection{运行环境} +Pard系统采用Java语言开发,运行时需要Java 8以上的运行环境,目前仅支持Linux系统。 +Pard采用P2P的设计思想,每个Pard Server都可以部署在集群中的任意节点上,且每个节点都可以作为主节点被客户端访问。 +在我们的实验环境中,我们利用三台普通台式机搭建了一个集群,分别为pard01、pard02和pard03。 +其中pard01和pard02分别部署了一个Pard Server,pard03部署了两个Pard Server。 + +\subsection{开发环境} +系统的开发环境主要由Git、Maven(v3.3.9+)和Java 8构成。开发的IDE包括Eclipse和Intellij IDEA。 +系统利用Maven实现了代码风格的管理和代码编译、打包、自动分发的功能,方便了项目的开发过程。 +\lstinline|mvn clean compile| +\lstinline[language=bash]|mvn clean package| + +\subsection{使用说明} +Pard系统的使用说明如下: +\begin{enumerate} +\item 代码编译和打包: + +切换到Pard项目的根目录,执行 +\lstinline[language=bash]|mvn clean package| + +\textit{pard-assembly/target}目录下的\textit{.zip}或\textit{.tar.gz}文件即为部署文件。 + +解压后目录结构如图\ref{fig:release} + +\item Pard Server的启动: +\lstinline[language=bash]|./sbin/pard-server start/run| +Pard Server的启动方式分为两种: +\begin{itemize} + \item start: 后台启动。Pard Server进程将利用nohup后台启动。 + \item run: 前台启动。 +\end{itemize} +对于后台启动的Pard Server,可以调用 +\lstinline[language=bash]|./sbin/pard-server stop| +停止进程。 + +\item 配置文件: +\begin{itemize} +\item \textit{pard.name}: Pard Server的名称,需要保证每个Server都不相同。 +\item \textit{pard.host}: Pard Server的网络IP地址。 +\item \textit{pard.server.port}: Pard Server的服务端口号。客户端连接该端口。 +\item \textit{pard.web.port}: Pard Server的网络端口号。 +\item \textit{pard.rpc.port}: Pard Server的RPC端口。供Server之间调用。 +\item \textit{pard.exchange.port}: Pard Server数据传输的端口。 +\item \textit{pard.file.port}: Pard Server文件传输的端口。 +\item \textit{pard.connector.host}: Pard Server连接的数据库的地址。 +\item \textit{pard.connector.user}: Pard Server连接的数据库的用户名。 +\item \textit{pard.connector.password}: Pard Server连接的数据库对应用户的密码。 +\item \textit{pard.connector.driver}: Pard Server使用的JDBC连接的driver类。 +\end{itemize} +\end{enumerate} + + +\begin{figure}[htbp] + \centering + \includegraphics[scale=0.9]{figure/release.jpg} + \caption{发布版目录结构} + \label{fig:release} +\end{figure} + + +\subsection{项目管理} +Pard的开发利用Github进行协同,并且代码都开源在Github上。\textit{https://github.com/dbiir/pard} + +\section{系统架构} +Pard整体架构如图\ref{fig:archi}。用户通过命令行连接集群中的任意一个Pard Node进行交互,每个Pard Node都可以通过内置的connector连接多个SlaveDB。 +通过connector的方式屏蔽底层SlaveDB的细节,这样SlaveDB可以为PostgreSQL或者MySQL等任意数据库,只需按照connector的接口开发对应的connector即可。 + +\begin{figure}[htbp] + \centering + \includegraphics[width=\linewidth]{figure/architecture.png} + \caption{Pard整体架构图} + \label{fig:archi} +\end{figure} + +Pard单个节点的架构如图\ref{fig:node1}。每个节点都有可能成为与用户直接交互的节点,应用层面诸如Client、Web UI +是用户可见的抽象层级。下面来看底层实现。Pard收到用户输入的SQL语句,转化为执行的Job,先交给SQL Parser进行SQL的语法解析,得到抽象语法树AST。 +再依据查询本身特性和数据划分的特点,在SQL Optimizer模块中进行优化。然后由Job Planner制定物理的查询执行计划,并将计划交给Job scheduler,生成具体的查询执行任务, +分发给各个节点去执行,并协调任务之间执行的顺序、同步异步等。 + +各个节点还有存储管理模块,负责管理数据在内存中的组织形式。数据在内存中以\textit{Block}的方式组织。 +节点中的通讯模块具体分两类:一类是任务的通信,使用RPC技术发送较小数据量的任务通知; +另一类是SQL执行需要的大批量数据的通信,比如节点之间的数据shuffle,我们使用Netty做大批量数据的异步传输。 +元数据由各节点的Catalog模块维护,数据分布在集群中的各个节点,由分布式的KV存储系统etcd负责存储和数据的同步,etcd遵循的raft协议可以确保各个节点元数据的一致性。 +节点中的Executor是本地执行器,负责执行接收到的具体的查询任务,并调用Connetor与本地连接的数据库进行交互。 +NodeKeeper模块负责集群中节点状态的维护,每个Pard Server启动的时候都需要向NodeKeeper注册,并在进程结束的时候通知NodeKeeper。 + +\begin{figure}[htbp] + \centering + \includegraphics[width=0.7\linewidth]{figure/pard-node-in.png} + \caption{Pard单个节点的架构} + \label{fig:node1} +\end{figure} + +\subsection{PardServer启动/关闭流程} +Pard Server的启动流程: +\begin{enumerate} +\item 初始化配置。读取配置文件,并且检查配置项。 +\item 加载connector。加载配置的connector,包括初始化数据库JDBC连接的连接池。 +\item 加载Catalog。初始化Catalog与etcd的连接,并启动etcd的watch线程。 +\item 加载本地执行器(Executor)。初始化本地执行器。 +\item 加载NodeKeeper。初始化本地的NodeKeeper模块。 +\item 启动数据传输的ExchangeServer。以线程方式初始化和启动Netty。不阻塞主进程。 +\item 启动文件传输的FileExchangeServer。以线程方式初始化和启动Netty。不阻塞主进程。 +\item 启动RPCServer。以线程方式初始化和启动RPC服务。不阻塞主进程。 +\item 加载JobScheduler。初始化JobScheduler,并创建单例。 +\item 加载TaskScheduler。初始化TaskScheduler,并创建单例。 +\item Pard Server注册。向NodeKeeper注册一个Pard Server,包括名称、IP地址、RPC端口、数据传输端口、文件传输端口等。该信息会在etcd中进行同步,方便其他节点查询。 +\item 启动Pard Web Server。以线程方式启动内嵌的Jetty作为web server,目前用于展示查询计划。 +\item 启动socket监听。服务器和客户端之间采用socket进行通信,启动服务器端的socket监听线程。 +\item 注册shutdownHook。注册JVM进程停止的shutdownHook,该hook中添加的方法将会按照顺序在JVM停止之前执行,进行清理和资源释放。 +\end{enumerate} + +Pard Server的停止流程: +\begin{enumerate} +\item 停止web server。 +\item 停止socket连接和监听。 +\item 通知NodeKeeper更改Pard Server的状态为下线(或从NodeKeeper中删除该节点)。 +\item 停止Catalog。中止etcd的watch线程,并释放与etcd的连接。 +\item 停止本地执行器(Executor)。停止正在执行的task。 +\item 停止数据传输的ExchangeServer。关闭Netty线程。 +\item 停止文件传输的FileExchangeServer。关闭Netty线程。 +\item 停止RPCServer。关闭RPC线程。 +\item 关闭Connector。关闭与数据库的连接池。 +\end{enumerate} + +\subsection{时间安排} +如图\ref{fig:tl}。 + +\begin{figure}[htbp] + \centering + \includegraphics[width=\linewidth]{figure/timeline.png} + \caption{时间安排timeline} + \label{fig:tl} +\end{figure} + +\section{各模块详细设计} +\subsection{执行流程} + +总体执行流程如图\ref{fig:exec-flow}。 + +\begin{figure}[htbp] + \centering + \includegraphics[width=\linewidth]{figure/exec-flow.png} + \caption{总体执行流程} + \label{fig:exec-flow} +\end{figure} + + +package cn.edu.ruc.iir.pard.scheduler; + + +在PardServer中,每个客户端连接由一个单独的线程负责,通过socket方式连接。 +用户输入的SQL语句会提交到\textit{JobScheduler}中,创建一个新的\textit{Job}。JobSchdueler会维护和更新该Job的状态信息。具体流程如下: +\begin{itemize} +\item SQL语句先由\textit{Pard Parser}进行语义解析,得到一个抽象语法树(AST),同时Job的状态更新为\textit{PARSED}。 +\item 抽象语法树由\textit{Pard Planner}制定逻辑查询计划,同时Job的状态更新为\textit{PLANNING}。 +\item 生成的逻辑查询计划由\textit{Pard Optimizer}负责进行优化,同时Job的状态更新为\textit{SCHEDULING}。 +\item 优化后的查询计划交给\textit{Task Scheduler}生成执行的任务,并进行任务的调度、分发和监控,各节点负责接收任务并执行。 +同时Job的状态更新为\textit{EXECUTING}。 +\item 任务执行完毕以后,Job的最终状态为三种,\textit{DONE}、\textit{ABORTED}和\textit{FAILED}。这三种状态分别表示执行顺利完成、执行被放弃和执行失败。 +\end{itemize} + +在查询执行过程中,每次任务执行时都需要通过NodeKeeper同步当前集群的节点状态, +如果有节点状态为非在线,则该查询转为\textit{ABORTED}状态,并刷出日志提醒用户。 + +在JobScheduler中记录了该Server所有正在执行的job的情况,并且已执行完的Job根据三种状态,分别维护了一个列表。 +这些信息可以提供给web端进行状态显示。 + +\subsubsection{CREATE/DROP/INSERT 执行流程} +\textit{CREATE/DROP/INSERT} 生成执行任务后,任务将通过RPC的方式发送给对应节点,并返回执行结果。 + +\subsubsection{LOAD 执行流程} +\textit{LOAD} 生成执行任务后,共有两类task。 +一类在本地读取文件,并按照数据划分的规则将文件内容划分后存储在内存中; +另一类将内存中划分后的数据发送到对应节点,并调用节点的Executor执行本地数据库的LOAD操作。 +最后收集各节点的执行状态,返回最终状态给用户。 + +\subsubsection{SELECT 执行流程} +单表的\textit{SELECT} 生成执行任务后,分发给对应节点执行。任务中包含查询执行的操作树,节点的Executor调用本的Connector。 +本地Connector根据查询树生成对应的SQL语句,如适应于PostgreSQL语法规则的SQL语句。 +Connector执行的结果以Block的形式流式地返回给Executor,即Executor每次读取一个Block,而不是一次读取整个结果集, +这种方式有效地节省了内存的使用,防止数据量大时内存不够。 + +\subsubsection{JOIN 执行流程} + + +\subsubsection{错误信息} +查询执行过程可能产生各种错误,对此,系统设计了一套错误信息。 +% todo error message list + +\subsection{节点通信} +通讯任务依据数据传输量的大小,和任务本身性质,可以自然的分为两类: +一类是描述节点要负责的任务本身的通知,数据量很小,我们选用RPC技术来实现; +另一类是因数据partition产生的大批量数据传输,数据量通常很大,我们选用Netty框架来实现。 +Insert、Delete、Load等返回值比较少,不需要太多交互通信的也都是用RPC来做的, +其余大批量通信比较多的涉及到Netty。 +Pard有对Byte array做可选压缩的功能, + + +\begin{figure}[htbp] + \centering + \includegraphics[width=0.7\linewidth]{figure/iobound.png} + \caption{Pard inbound outbound} + \label{fig:iobound} +\end{figure} + +Pard中Netty 的事件可以分为 Inbound 和 Outbound 事件. +从图\ref{fig:iobound}可以看出, inbound 事件和 outbound 事件的流向是不一样的, inbound 事件的流行是从下至上, 而 outbound 刚好相反, 是从上到下。并且 inbound 的传递方式是通过调用相应的 \lstinline|ChannelHandlerContext.fireIN_EVT()| 方法, 而 outbound 方法的的传递方式是通过调用 \lstinline|ChannelHandlerContext.OUT_EVT()| 方法。 例如 \lstinline|ChannelHandlerContext.fireChannelRegistered()| 调用会发送一个 \textit{ChannelRegistered} 的 inbound 给下一个ChannelHandlerContext, 而 \lstinline|ChannelHandlerContext.bind| 调用会发送一个 \textit{bind} 的 outbound 事件给 下一个 ChannelHandlerContext。 +\lstinline|PardFileExchangeClient| 的 \lstinline|run| 方法的pileline就加入了新的 +\lstinline|ExchangeFileSendHandler|( \lstinline|extends ChannelInboundHandlerAdapter| )的实例。 + +网络传输是只能传输byte array的,拿到后要做语义解析。我们开始自己做了一个Object Encoder和Decoder, +还有使用gzip算法做数据压缩的可选功能。 +Netty自己也实现了解析byte array的这类需求,叫编解码技术。 +服务器编码数据后发送到客户端,客户端需要对数据进行解码。编解码器由两部分组成:编码器、解码器。 +解码器:负责将消息从字节或其他序列形式转成指定的消息对象; +编码器:将消息对象转成字节或其他序列形式在网络上传输。 +我们的编解码其都是ChannelHandler的实现。 +入站“ByteBuf”读取bytes后由 ToIntegerDecoder 进行解码,然后将解码后的消息存入List集合中,然后传递到ChannelPipeline中的下一个ChannelInboundHandler。 + + + + + +\subsection{元数据} +Pard使用etcd,以Key-Value形式存储GDD。etcd 的官方定义是: +\begin{quote} + A highly-available key value store for shared configuration and service discovery. +\end{quote} +实际上,etcd作为一个受到Zookeeper与doozer启发而催生的项目,除了拥有与之类似的功能外,更具有以下4个特点: +\begin{enumerate} + \item 简单:基于HTTP+JSON的API让你用curl命令就可以轻松使用; + \item 安全:可选SSL客户认证机制; + \item 快速:每个实例每秒支持一千次写操作; + \item 可信:使用Raft算法充分实现了分布式。 +\end{enumerate} +所以我们选用etcd存储GDD。 +Pard的一个亮点是基于etcd的watch机制,减少了IO的操作。 +只有当GDD发生改变时,才去取数据,更新。 +我们使用etcd的持久监听(stream),当有事件时,会连续触发,不需要客户端重新发起监听。 +因为GDD本来就很小,所我们在内存中维护GDD。 +Pard watch的有site、schema、user,每个站点开启三个Thread来watch。 +调用 \lstinline| watcher.listen()| 方法持续的watch。 + + +元数据模型关键结构如下: +\begin{lstlisting} +public class GDD { +HashMapsiteList; +HashMapschemaList; +HashMapuserList; +} + +public class Site { +int Id; +String name; +String ip; +int port; +boolean isLeader; +} + +public class Schema { +String name; +int id; +ListtableList; +ListuserList; +//Statics statics; +} +public class Privilege { +int use;//1,read, 3, write,5create 7,delete; +int uid; +String username; +} +public class Table { +String tablename; +int id; +HashMapcolumns; +Listfragment; +ListuserList; +int isFragment; +Statics statics; +} + +public class Column { +int id; +int dataType; +String columnName; +int len; +int index;//0:none; 1:hashindex; 2:btreeindex; 3:others +boolean isKey; +} +public class Fragment { +int fragmentType;//0: horizontal;1:vertical +Listcondition; +Table subTable +int siteId; +Statics statics; +} +public class Condition { +String columnName; +int CompareType;//define less,great,equal.. +String value; +int dataType;//the datatype of value +} +public class Statics { +String columnName; +String min; +String max; +String mean; +String mode; +String median; +HashMapstaticList; +} +public class User { +int uid; +String username; +HashMaptableList; +HashMapschemaList; +} +\end{lstlisting} + + + + + + +\subsection{SQL解析} +语义语法解析使用Antlrv4。 +ANTLR—Another Tool for Language Recognition,可以根据输入的SQL命令和我们自定义的规则, +根据自动生成语法树, + +Pard支持的语法如下: +\begin{lstlisting} +statement +: query #statementDefault +| USE schema=identifier #use +| CREATE SCHEMA (IF NOT EXISTS)? qualifiedName #createSchema +| DROP SCHEMA (IF EXISTS)? qualifiedName (CASCADE | RESTRICT)? #dropSchema +| ALTER SCHEMA qualifiedName RENAME TO identifier #renameSchema +| CREATE TABLE (IF NOT EXISTS)? qualifiedName +tableElementPart +(',' tableElementPart)* +partitionOps? #createTable +| CREATE INDEX indexName=identifier ON +indexTbl=qualifiedName '(' identifier (',' identifier)*')' #createIndex +| DROP INDEX indexName=identifier #dropIndex +| DROP TABLE (IF EXISTS)? qualifiedName #dropTable +| INSERT INTO qualifiedName columnAliases? query #insertInto +| DELETE FROM qualifiedName (WHERE booleanExpression)? #delete +| ALTER TABLE from=qualifiedName RENAME TO to=qualifiedName #renameTable +| ALTER TABLE tableName=qualifiedName +RENAME COLUMN from=identifier TO to=identifier #renameColumn +| ALTER TABLE tableName=qualifiedName +DROP COLUMN column=qualifiedName #dropColumn +| ALTER TABLE tableName=qualifiedName +ADD COLUMN column=columnDefinition #addColumn +| GRANT +(privilege (',' privilege)* | ALL PRIVILEGES) +ON TABLE? qualifiedName TO grantee=identifier +(WITH GRANT OPTION)? #grant +| REVOKE +(GRANT OPTION FOR)? +(privilege (',' privilege)* | ALL PRIVILEGES) +ON TABLE? qualifiedName FROM grantee=identifier #revoke +| SHOW GRANTS +(ON TABLE? qualifiedName)? #showGrants +| EXPLAIN ANALYZE? VERBOSE? +('(' explainOption (',' explainOption)* ')')? statement #explain +| SHOW STATS (FOR | ON) qualifiedName #showStats +| SHOW STATS FOR '(' querySpecification ')' #showStatsForQuery +| DESCRIBE qualifiedName #showColumns +| DESC qualifiedName #showColumns +| START TRANSACTION (transactionMode (',' transactionMode)*)? #startTransaction +| COMMIT WORK? #commit +| ROLLBACK WORK? #rollback +| SHOW PARTITIONS (FROM | IN) qualifiedName #showPartitions +| SHOW SCHEMAS #showSchemas +| SHOW TABLES (FROM schemaName=identifier)? #showTables +| LOAD path=identifier INTO table=qualifiedName #load +; +\end{lstlisting} + +目前并没有实现全部的完整SQL功能,只 +支持基本的Select语句,也就是验收要求的部分,如 +\lstinline|select * from ASG where eno<’E1020’ and eno>=’E0990’| + + + + + +\subsection{SQL优化} +% PLAN树结构 +% PROJECT/FILTER下推 +% 表达式计算 +% INSERT/DELETE/SELECT剪枝 +% JOIN优化 + +\section{任务分工及小结} + +\subsection{金国栋的小结} + +\subsection{韩涵的小结} + +\subsection{黄文韬的小结} + +\subsection{陈成的小结} +本门课程中,学习了分布式数据库的一些非常基础的知识,相比于另一门数据库管理系统实现,这门课就多在了分布式的部分, +数据如何partition,不同的partition方式下查询等操作如何优化。大作业确实非常challenging,比如Netty框架设计复杂精妙,我学习花了很大时间,一个个example的改、试,后来在师兄的帮助下才正确的运用在了项目中。 +前几次课与卢卫老师的大数据管理冲突,我都没来,后来觉得还是应该去一下明确本学期在课程上的投入。 +课程消耗精力太多的话,没时间做自己该负责的科研项目了。 +我认为,我们没有在预期时间完成任务的原因是当初系统设计的导向不是任务导向,当初可能要求太高了,各种复杂的结构、情况都考虑的话, +过早优化是万恶之源, +可能基本的任务都没完成就得不偿失了。 +通过这次大作业感受到搭建一个能够work的系统并不是一件容易的事 +情,总是会有莫名其妙的 bug 困扰着你并消磨你的热情;另一个体会是,永远不要把工作往 +后推,因为一到期末 deadline 很多,你并不会像自己预期那样高效,连续的熬夜也不能迅速 +打造完美的系统。 + + + + + +\end{document} \ No newline at end of file diff --git a/pard-assembly/pom.xml b/pard-assembly/pom.xml index a00243d..1a5db53 100644 --- a/pard-assembly/pom.xml +++ b/pard-assembly/pom.xml @@ -17,24 +17,24 @@ - - - - - - - - - - - - - - - - - - + + org.apache.maven.plugins + maven-assembly-plugin + + + src/main/assembly/pard-final.xml + + + + + make-assembly + package + + single + + + + \ No newline at end of file diff --git a/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/catalog/Column.java b/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/catalog/Column.java index 6f0ab7c..0ee0568 100644 --- a/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/catalog/Column.java +++ b/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/catalog/Column.java @@ -9,6 +9,7 @@ public class Column private int id; private int dataType; private String columnName; + private String tableName; private int len; private int index; //0:none; 1:hashindex; 2:btreeindex; 3:others private int key; @@ -17,7 +18,7 @@ public Column() { } - public Column(int id, int dataType, String columnName, int len, int index, int key) + public Column(int id, int dataType, String columnName, int len, int index, int key, String tableName) { this.id = id; this.dataType = dataType; @@ -25,8 +26,18 @@ public Column(int id, int dataType, String columnName, int len, int index, int k this.len = len; this.index = index; this.key = key; + this.tableName = tableName; + } + public Column(Column column) + { + this.id = column.id; + this.dataType = column.dataType; + this.columnName = column.columnName; + this.len = column.len; + this.index = column.index; + this.key = column.key; + this.tableName = column.tableName; } - public int getId() { return id; @@ -86,4 +97,86 @@ public void setKey(int key) { this.key = key; } + + @Override + public int hashCode() + { + final int prime = 31; + int result = 1; + result = prime * result + ((columnName == null) ? 0 : columnName.hashCode()); + result = prime * result + ((tableName == null) ? 0 : tableName.hashCode()); + result = prime * result + dataType; + result = prime * result + id; + result = prime * result + index; + result = prime * result + key; + result = prime * result + len; + return result; + } + + @Override + public boolean equals(Object obj) + { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (getClass() != obj.getClass()) { + return false; + } + Column other = (Column) obj; + if (columnName == null) { + if (other.columnName != null) { + return false; + } + } + else if (!columnName.equals(other.columnName)) { + return false; + } + if (tableName == null) { + if (other.tableName != null) { + return false; + } + } + else if (!tableName.equals(other.tableName)) { + return false; + } + if (dataType != other.dataType) { + return false; + } + if (id != other.id) { + return false; + } + if (index != other.index) { + return false; + } + if (key != other.key) { + return false; + } + if (len != other.len) { + return false; + } + return true; + } + + public String getTableName() + { + return tableName; + } + + public void setTableName(String tableName) + { + this.tableName = tableName; + } + + public String toString() + { + if (tableName == null) { + return columnName; + } + else { + return tableName + "." + columnName; + } + } } diff --git a/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/etcd/EtcdUtil.java b/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/etcd/EtcdUtil.java index ff303ca..930186a 100644 --- a/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/etcd/EtcdUtil.java +++ b/pard-catalog/src/main/java/cn/edu/ruc/iir/pard/etcd/EtcdUtil.java @@ -277,6 +277,9 @@ private static Schema convertSchema(Schema schema) //System.out.println(json.toString()); Column col = (Column) JSONObject.toBean(json, Column.class); columnHashMap.put(obj.toString(), col); + if (col.getTableName() == null || col.getTableName().isEmpty()) { + col.setTableName(table.getTablename()); + } } table.setColumns(columnHashMap); jsonObject = JSONObject.fromObject(table.getStaticsMap()); diff --git a/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PardClient.java b/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PardClient.java index 2a45daf..b341b72 100644 --- a/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PardClient.java +++ b/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PardClient.java @@ -75,177 +75,18 @@ public void run() pretty.addRow(r); counter++; } - //System.out.println(pretty); - //pretty.printLargeDataSets(); - pretty.printLargeDataSetsOneByOne(); - System.out.println("Selected " + counter + " tuples"); - System.out.println("Execution time: " + ((double) resultSet.getExecutionTime()) / 1000 + "s"); - } - else { - System.out.println(resultSet.getStatus().toString()); - } - } - else { - System.out.println("Client receive unknown object"); - } - } - } - catch (IOException e) { - e.printStackTrace(); - break; - } - catch (ClassNotFoundException e) { - e.printStackTrace(); - } - } - } - System.out.println("Bye Pard"); - System.exit(0); - } - - /* - public PardClient() - { - this.inputStream = null; - this.outWriter = new BufferedWriter(new OutputStreamWriter(System.out)); - this.scanner = new Scanner(System.in); - } - public void testrun() - { - System.out.println("Welcome to Pard."); - while (true) { - System.out.print("pard>"); - String line = scanner.nextLine(); - if (line.equalsIgnoreCase("QUIT") || line.equalsIgnoreCase("EXIT")) { - break; - } - else { - try { - String[] queries = line.split(";"); - for (String q : queries) { - outWriter.write(q); - outWriter.newLine(); - outWriter.flush(); - //Object obj = inputStream.readObject(); - PardResultSet prs = new PardResultSet(PardResultSet.ResultStatus.OK); - List columns0 = new ArrayList<>(); - Column col0 = new Column(); - col0.setDataType(DataType.CHAR.getType()); - col0.setLen(20); - col0.setColumnName("name"); - Column col1 = new Column(); - col1.setDataType(DataType.INT.getType()); - col1.setColumnName("id"); - col1.setKey(1); - Column col2 = new Column(); - col2.setDataType(DataType.CHAR.getType()); - col2.setLen(50); - col2.setColumnName("alma mater"); - Column col3 = new Column(); - col3.setDataType(DataType.FLOAT.getType()); - col3.setLen(20); - col3.setColumnName("score"); - columns0.add(col0); - columns0.add(col1); - columns0.add(col2); - columns0.add(col3); - prs.setSchema(columns0); - Object obj = prs; - String temp = null; - if (obj instanceof PardResultSet) { - PardResultSet resultSet = (PardResultSet) obj; - if (resultSet.getStatus() == PardResultSet.ResultStatus.OK) { - List columns = resultSet.getSchema(); - System.out.println(resultSet.toString()); - final List colNames = new ArrayList<>(); - final List colTypes = new ArrayList<>(); - columns.forEach(c -> { - colNames.add(c.getColumnName()); - colTypes.add(c.getDataType()); - }); - Object[] header = colNames.toArray(); - String[] tableHeader = new String[header.length]; - for (int i = 0; i < tableHeader.length; i++) { - tableHeader[i] = (String) header[i]; + System.out.println(pretty); + System.err.println("Selected " + counter + " tuples"); + System.err.println("Execution time: " + ((double) resultSet.getExecutionTime()) / 1000 + "s"); + if (resultSet.getSemanticErrmsg() != null) { + System.err.println("Semantic Status:" + resultSet.getSemanticErrmsg()); } - PrettyTable pretty = new PrettyTable(tableHeader); - //List rows = new ArrayList(); - RowConstructor rc1 = new RowConstructor(); - rc1.appendString("TOMTOMTOM"); - rc1.appendInt(121345); - rc1.appendString("RUCRUCRUCRUCRUCRUC"); - rc1.appendFloat(78.2f); - Row row1 = rc1.build(); - temp = rc1.printRow(row1, colTypes); - String[] r1 = temp.substring(0, temp.length() - 1).split("\t"); - RowConstructor rc2 = new RowConstructor(); - rc2.appendString("TOMTOMTOMTOMTOMTOMTOMTOMTOM"); - rc2.appendInt(1213415565); - rc2.appendString("RUCRUCRUCRUCRUCRUCRUCRUCRUCRUCRUCRUC"); - rc2.appendFloat(78.248481f); - Row row2 = rc2.build(); - temp = rc2.printRow(row2, colTypes); - String[] r2 = temp.substring(0, temp.length() - 1).split("\t"); - RowConstructor rc3 = new RowConstructor(); - rc3.appendString("TOM"); - rc3.appendInt(1214); - rc3.appendString("RUC"); - rc3.appendFloat(78.248481f); - Row row3 = rc3.build(); - temp = rc3.printRow(row3, colTypes); - String[] r3 = temp.substring(0, temp.length() - 1).split("\t"); - RowConstructor rc4 = new RowConstructor(); - rc4.appendString("T"); - rc4.appendInt(121345); - rc4.appendString("R"); - rc4.appendFloat(78.2f); - Row row4 = rc4.build(); - temp = rc4.printRow(row1, colTypes); - String[] r4 = temp.substring(0, temp.length() - 1).split("\t"); - RowConstructor rc5 = new RowConstructor(); - rc5.appendString("OM"); - rc5.appendInt(1213415565); - rc5.appendString("UCR"); - rc5.appendFloat(78.248481f); - Row row5 = rc5.build(); - temp = rc5.printRow(row5, colTypes); - String[] r5 = temp.substring(0, temp.length() - 1).split("\t"); - RowConstructor rc6 = new RowConstructor(); - rc6.appendString("OMT"); - rc6.appendInt(1214); - rc6.appendString("CRU"); - rc6.appendFloat(78.248481f); - Row row6 = rc6.build(); - temp = rc6.printRow(row3, colTypes); - String[] r6 = temp.substring(0, temp.length() - 1).split("\t"); - pretty.addRow(r1); - pretty.addRow(r2); - pretty.addRow(r3); - pretty.addRow(r4); - pretty.addRow(r5); - pretty.addRow(r6); - - for (int i = 0; i < 20000; i++) { - RowConstructor rc7 = new RowConstructor(); - rc7.appendString("hebe"); - rc7.appendInt(7899); - rc7.appendString("irc"); - rc7.appendFloat(784.5f); - Row row7 = rc7.build(); - temp = rc7.printRow(row7, colTypes); - String[] r7 = temp.substring(0, temp.length() - 1).split("\t"); - pretty.addRow(r7); - } - long st = System.currentTimeMillis(); - //System.out.println(pretty); - //pretty.printLargeDataSets(); - pretty.printLargeDataSetsOneByOne(); - long et = System.currentTimeMillis(); - System.out.println("TIME " + (et - st)); - System.out.println(pretty.rowSize()); } else { - System.out.println(resultSet.getStatus().toString()); + System.err.println(resultSet.getStatus().toString()); + if (resultSet.getSemanticErrmsg() != null) { + System.err.println(resultSet.getSemanticErrmsg()); + } } } else { @@ -257,7 +98,7 @@ public void testrun() e.printStackTrace(); break; } - catch (Exception e) { + catch (ClassNotFoundException e) { e.printStackTrace(); } } @@ -265,21 +106,9 @@ public void testrun() System.out.println("Bye Pard"); System.exit(0); } - private static void testPrettyTable() - { - PrettyTable table = new PrettyTable("Firstname", "Lastname", "Email", "Phone"); - table.addRow("John", "Doe", "johndoe@nothing.com", "+2137999999"); - table.addRow("Jane", "Doe", "janedoe@nothin.com", "+2137999999"); - System.out.println(table); - PardClient pc = new PardClient(); - pc.testrun(); - } - */ public static void main(String[] args) { - //testPrettyTable(); - if (args.length != 2) { System.out.println("PardClient "); System.exit(-1); diff --git a/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PrettyTable.java b/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PrettyTable.java index 3d22c83..6a38943 100644 --- a/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PrettyTable.java +++ b/pard-client/src/main/java/cn/edu/ruc/iir/pard/client/PrettyTable.java @@ -10,6 +10,7 @@ public class PrettyTable { private List headers = new ArrayList<>(); private List> data = new ArrayList<>(); + private List colLength = new ArrayList<>(); public PrettyTable(String... headers) { @@ -21,7 +22,7 @@ public void addRow(String... row) data.add(Arrays.asList(row)); } - private int getMaxSize(int column) + private int getMaxSizeForCol(int column) { int maxSize = headers.get(column).length(); for (List row : data) { @@ -32,24 +33,27 @@ private int getMaxSize(int column) return maxSize; } - private String formatRow(List row) + private String formatRule() { StringBuilder result = new StringBuilder(); - result.append("|"); - for (int i = 0; i < row.size(); i++) { - result.append(StringUtils.center(row.get(i), getMaxSize(i) + 2)); - result.append("|"); + result.append("+"); + for (int i = 0; i < headers.size(); i++) { + for (int j = 0; j < colLength.get(i).intValue() + 2; j++) { + result.append("-"); + } + result.append("+"); } result.append("\n"); return result.toString(); } - private String formatRule() + private String formatRuleNew() { StringBuilder result = new StringBuilder(); result.append("+"); for (int i = 0; i < headers.size(); i++) { - for (int j = 0; j < getMaxSize(i) + 2; j++) { + colLength.add(new Integer(getMaxSizeForCol(i))); + for (int j = 0; j < colLength.get(i).intValue() + 2; j++) { result.append("-"); } result.append("+"); @@ -58,63 +62,33 @@ private String formatRule() return result.toString(); } - public String toString() + private String formatRowNew(List row) { StringBuilder result = new StringBuilder(); - result.append(formatRule()); - result.append(formatRow(headers)); - result.append(formatRule()); - for (List row : data) { - result.append(formatRow(row)); + result.append("|"); + for (int i = 0; i < row.size(); i++) { + result.append(StringUtils.center(row.get(i), colLength.get(i).intValue() + 2)); + result.append("|"); } - result.append(formatRule()); - return result.toString(); - } - - public int rowSize() - { - return data.size(); - } - - public String printHeader() - { - StringBuilder result = new StringBuilder(); - result.append(formatRule()); - result.append(formatRow(headers)); - result.append(formatRule()); + result.append("\n"); return result.toString(); } - public String printEnd() + public String toString() { StringBuilder result = new StringBuilder(); + result.append(formatRuleNew()); + result.append(formatRowNew(headers)); result.append(formatRule()); - return result.toString(); - } - - public void printLargeDataSets() - { - System.out.println(printHeader()); - StringBuilder result = new StringBuilder(); - int count = 0; for (List row : data) { - result.append(formatRow(row)); - count++; - if (count == 5000) { - System.out.println(result.toString()); - result.delete(0, result.length()); - count = 0; - } + result.append(formatRowNew(row)); } - System.out.println(printEnd()); + result.append(formatRule()); + return result.toString(); } - public void printLargeDataSetsOneByOne() + public int rowSize() { - System.out.println(printHeader()); - for (List row : data) { - System.out.println(formatRow(row)); - } - System.out.println(printEnd()); + return data.size(); } } diff --git a/pard-client/src/test/java/cn/edu/ruc/iir/pard/client/TestPardClient.java b/pard-client/src/test/java/cn/edu/ruc/iir/pard/client/TestPardClient.java index a370f77..af1e0d5 100644 --- a/pard-client/src/test/java/cn/edu/ruc/iir/pard/client/TestPardClient.java +++ b/pard-client/src/test/java/cn/edu/ruc/iir/pard/client/TestPardClient.java @@ -32,4 +32,13 @@ private void testPrettyTable() System.out.println(RowConstructor.printRow(rowConstructor.build(), ImmutableList.of(DataType.VARCHAR.getType(), DataType.INT.getType(), DataType.DOUBLE.getType()))); } + + @Test + public void testClient() + { + String host = "10.77.40.31"; + int port = 11013; + String[] args = {host, port + ""}; + PardClient.main(args); + } } diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ErrorMessage.java b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/ErrorMessage.java similarity index 86% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ErrorMessage.java rename to pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/ErrorMessage.java index f498dd4..70db5fa 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ErrorMessage.java +++ b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/ErrorMessage.java @@ -1,4 +1,4 @@ -package cn.edu.ruc.iir.pard.planner; +package cn.edu.ruc.iir.pard.commons.exception; import java.util.HashMap; import java.util.Map; @@ -7,6 +7,7 @@ public class ErrorMessage { private int errcode; private String errmsg; + private transient PardException exception; private static Map template = null; private static ErrorMessage ok = throwMessage(ErrCode.OK, ""); public static ErrorMessage throwMessage(int errorCode, Object...objects) @@ -22,6 +23,7 @@ public static ErrorMessage getOKMessage() } public static class ErrCode { + public static final int SomeSiteDown = 2; public static final int OK = 1; public static final int ParseError = -10000; public static final int SchemaExsits = -10001; @@ -48,6 +50,8 @@ public static class ErrCode public static final int UnSupportedQuery = -10022; public static final int FileNotFound = -10023; public static final int FileIOError = -10024; + public static final int AllSiteDown = -10025; + public static final int ColumnNameIsAmbiguous = -10026; } public static void init() { @@ -78,7 +82,10 @@ public static void init() template.put(ErrCode.UnSupportedQuery, "Unsupported query:%s"); template.put(ErrCode.FileNotFound, "File not exists"); template.put(ErrCode.FileIOError, "File IO error"); + template.put(ErrCode.AllSiteDown, "Congratulations! The table %s site is all lost connection. Pard needn't do a query plan, thank you!"); + template.put(ErrCode.ColumnNameIsAmbiguous, "Column name %s is ambigouous in %s"); template.put(ErrCode.OK, "success"); + template.put(ErrCode.SomeSiteDown, "Pard lost some sites' connections, the information you want may be not complete."); } static { @@ -92,14 +99,24 @@ public Map getTemplate() return template; } public ErrorMessage() - {} + { + try { + exception = new PardException(); + throw exception; + } + catch (PardException e) { + exception = e; + } + } public ErrorMessage(String errMsg, int errcode) { + this(); this.errcode = errcode; this.errmsg = errMsg; } public ErrorMessage(int errorCode, Object...objects) { + this(); String tmp = getTemplate().get(errorCode); this.errcode = errorCode; this.errmsg = String.format(tmp, objects); @@ -130,4 +147,9 @@ public String toString() { return String.format("ERROR:[%d] %s", errcode, errmsg); } + // get set + public PardException getException() + { + return exception; + } } diff --git a/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/PardException.java b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/PardException.java index 813474d..8644263 100644 --- a/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/PardException.java +++ b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/PardException.java @@ -5,6 +5,38 @@ * * @author guodong */ -public abstract class PardException +public class PardException + extends RuntimeException { + /** + * + */ + private static final long serialVersionUID = -8125939273116913981L; + private final ErrorMessage pardErrorMessage; + + public PardException() + { + pardErrorMessage = null; + } + + public PardException(int errorCode, Object... objs) + { + pardErrorMessage = ErrorMessage.throwMessage(errorCode, objs); + if (pardErrorMessage.getException() != null) { + this.initCause(pardErrorMessage.getException()); + } + } + + public PardException(ErrorMessage msg) + { + pardErrorMessage = msg; + if (pardErrorMessage.getException() != null) { + this.initCause(pardErrorMessage.getException()); + } + } + + public ErrorMessage getPardErrorMessage() + { + return pardErrorMessage; + } } diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/semantic/SemanticException.java b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/SemanticException.java similarity index 59% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/semantic/SemanticException.java rename to pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/SemanticException.java index 826364d..b6fbe42 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/semantic/SemanticException.java +++ b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/SemanticException.java @@ -1,6 +1,4 @@ -package cn.edu.ruc.iir.pard.semantic; - -import cn.edu.ruc.iir.pard.planner.ErrorMessage; +package cn.edu.ruc.iir.pard.commons.exception; /** * SemanticException @@ -9,7 +7,7 @@ * @author hagen * */ public class SemanticException - extends RuntimeException + extends PardException { /** * @@ -17,13 +15,24 @@ public class SemanticException private static final long serialVersionUID = -8125939273116913981L; private final ErrorMessage semanticErrorMessage; + public SemanticException() + { + semanticErrorMessage = null; + } + public SemanticException(int errorCode, Object...objs) { semanticErrorMessage = ErrorMessage.throwMessage(errorCode, objs); + if (semanticErrorMessage.getException() != null) { + this.initCause(semanticErrorMessage.getException()); + } } public SemanticException(ErrorMessage msg) { semanticErrorMessage = msg; + if (semanticErrorMessage.getException() != null) { + this.initCause(semanticErrorMessage.getException()); + } } public ErrorMessage getSemanticErrorMessage() { diff --git a/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/TaskSchedulerException.java b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/TaskSchedulerException.java new file mode 100644 index 0000000..1689217 --- /dev/null +++ b/pard-commons/src/main/java/cn/edu/ruc/iir/pard/commons/exception/TaskSchedulerException.java @@ -0,0 +1,25 @@ +package cn.edu.ruc.iir.pard.commons.exception; + +public class TaskSchedulerException + extends PardException +{ + /** + * + */ + private static final long serialVersionUID = 1L; + + public TaskSchedulerException() + { + super(); + } + + public TaskSchedulerException(ErrorMessage msg) + { + super(msg); + } + + public TaskSchedulerException(int errorCode, Object... objs) + { + super(errorCode, objs); + } +} diff --git a/pard-connector-postgresql/pom.xml b/pard-connector-postgresql/pom.xml index 8d8dc17..c5945a1 100644 --- a/pard-connector-postgresql/pom.xml +++ b/pard-connector-postgresql/pom.xml @@ -24,7 +24,10 @@ cn.edu.ruc.iir pard-catalog - + + cn.edu.ruc.iir + pard-optimizer + com.zaxxer HikariCP diff --git a/pard-connector-postgresql/src/main/java/cn/edu/ruc/iir/pard/connector/postgresql/PostgresConnector.java b/pard-connector-postgresql/src/main/java/cn/edu/ruc/iir/pard/connector/postgresql/PostgresConnector.java index 8254ea6..0f14f7f 100644 --- a/pard-connector-postgresql/src/main/java/cn/edu/ruc/iir/pard/connector/postgresql/PostgresConnector.java +++ b/pard-connector-postgresql/src/main/java/cn/edu/ruc/iir/pard/connector/postgresql/PostgresConnector.java @@ -2,39 +2,60 @@ import cn.edu.ruc.iir.pard.catalog.Column; import cn.edu.ruc.iir.pard.commons.config.PardUserConfiguration; +import cn.edu.ruc.iir.pard.commons.memory.Row; import cn.edu.ruc.iir.pard.commons.utils.DataType; import cn.edu.ruc.iir.pard.commons.utils.RowConstructor; import cn.edu.ruc.iir.pard.executor.connector.Connector; import cn.edu.ruc.iir.pard.executor.connector.CreateSchemaTask; import cn.edu.ruc.iir.pard.executor.connector.CreateTableTask; +import cn.edu.ruc.iir.pard.executor.connector.CreateTmpTableTask; import cn.edu.ruc.iir.pard.executor.connector.DeleteTask; import cn.edu.ruc.iir.pard.executor.connector.DropSchemaTask; import cn.edu.ruc.iir.pard.executor.connector.DropTableTask; import cn.edu.ruc.iir.pard.executor.connector.InsertIntoTask; +import cn.edu.ruc.iir.pard.executor.connector.JoinTask; import cn.edu.ruc.iir.pard.executor.connector.LoadTask; import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; import cn.edu.ruc.iir.pard.executor.connector.QueryTask; +import cn.edu.ruc.iir.pard.executor.connector.SendDataTask; import cn.edu.ruc.iir.pard.executor.connector.Task; import cn.edu.ruc.iir.pard.executor.connector.node.FilterNode; +import cn.edu.ruc.iir.pard.executor.connector.node.JoinNode; import cn.edu.ruc.iir.pard.executor.connector.node.LimitNode; import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; import cn.edu.ruc.iir.pard.executor.connector.node.ProjectNode; import cn.edu.ruc.iir.pard.executor.connector.node.SortNode; import cn.edu.ruc.iir.pard.executor.connector.node.TableScanNode; +import cn.edu.ruc.iir.pard.sql.expr.ColumnItem; +import cn.edu.ruc.iir.pard.sql.expr.Expr; +import cn.edu.ruc.iir.pard.sql.expr.FalseExpr; +import cn.edu.ruc.iir.pard.sql.expr.TrueExpr; +import cn.edu.ruc.iir.pard.sql.expr.ValueItem; +import cn.edu.ruc.iir.pard.sql.tree.Expression; +import com.google.common.collect.ImmutableList; import org.postgresql.copy.CopyManager; import org.postgresql.jdbc.PgConnection; +import java.io.BufferedReader; +import java.io.BufferedWriter; import java.io.File; import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; +import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Statement; +import java.sql.Types; import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; +import java.util.Map; import java.util.logging.Logger; /** @@ -97,6 +118,18 @@ public PardResultSet execute(Task task) if (task instanceof DeleteTask) { return executeDelete(conn, (DeleteTask) task); } + if (task instanceof JoinTask) { + return executeJoin(conn, (JoinTask) task); + } + if (task instanceof SendDataTask) { + return executeSendDataTask(conn, (SendDataTask) task); + } + if (task instanceof CreateTmpTableTask) { + return executeCreateTmpTable(conn, (CreateTmpTableTask) task); + } + if (task instanceof JoinTask) { + return executeJoin(conn, (JoinTask) task); + } } catch (SQLException e) { logger.info("GET CONNECTION FAILED"); @@ -431,6 +464,9 @@ private PardResultSet executeQuery(Connection conn, QueryTask task) } logger.info("QUERY SUCCESSFULLY"); PardResultSet prs = new PardResultSet(PardResultSet.ResultStatus.OK, columns); + for (Column column : columns) { + logger.info("PROJECT Columns: " + column.getColumnName()); + } prs.setJdbcResultSet(rs); prs.setJdbcConnection(conn); return prs; @@ -511,6 +547,603 @@ private PardResultSet executeDelete(Connection conn, DeleteTask task) return PardResultSet.execErrResultSet; } + private PardResultSet executeSendDataTask(Connection conn, SendDataTask task) + { + String schema = task.getSchemaName(); + String table = null; + try { + Statement statement = conn.createStatement(); + StringBuilder querySQL = new StringBuilder("select "); + PlanNode rootNode = task.getNode(); + System.out.println(rootNode); + List nodeList = new ArrayList<>(); + int nodeListCursor = 0; + FilterNode filterNode = null; + ProjectNode projectNode = null; + SortNode sortNode = null; + LimitNode limitNode = null; + boolean isFilter = false; + boolean isProject = false; + boolean isSort = false; + boolean isLimit = false; + nodeList.add(rootNode); + nodeListCursor++; + while (nodeList.get(nodeListCursor - 1).hasChildren()) { + nodeList.add(nodeList.get(nodeListCursor - 1).getLeftChild()); + nodeListCursor++; + } + for (int i = nodeListCursor - 1; i >= 0; i--) { + if (nodeList.get(i) instanceof TableScanNode) { + table = ((TableScanNode) nodeList.get(i)).getTable(); + schema = ((TableScanNode) nodeList.get(i)).getSchema(); + continue; + } + + if (nodeList.get(i) instanceof FilterNode) { + filterNode = (FilterNode) nodeList.get(i); + isFilter = true; + continue; + } + + if (nodeList.get(i) instanceof ProjectNode) { + projectNode = (ProjectNode) nodeList.get(i); + isProject = true; + continue; + } + + if (nodeList.get(i) instanceof SortNode) { + sortNode = (SortNode) nodeList.get(i); + isSort = true; + continue; + } + + if (nodeList.get(i) instanceof LimitNode) { + limitNode = (LimitNode) nodeList.get(i); + isLimit = true; + } + } +// if (isProject) { + List cols = projectNode.getColumns(); + for (Column column : cols) { + querySQL.append(column.getColumnName()); + querySQL.append(","); + } + querySQL = new StringBuilder(querySQL.substring(0, querySQL.length() - 1)); +// } +// else { +// querySQL.append(" *"); +// } + querySQL.append(" from "); + querySQL.append(schema); + querySQL.append("."); + querySQL.append(table); + if (isFilter) { + querySQL.append(" where ").append(filterNode.getExpression()).append(" "); + } + if (isSort) { + querySQL.append("order by"); + List columns = sortNode.getColumns(); + for (Column column : columns) { + querySQL.append(" "); + querySQL.append(column.getColumnName()); + querySQL.append(","); + } + querySQL = new StringBuilder(querySQL.substring(0, querySQL.length() - 1)); + } + if (isLimit) { + querySQL.append(" limit "); + querySQL.append(limitNode.getLimitNum()); + } + + logger.info("Postgres connector: " + querySQL); + ResultSet rs = statement.executeQuery(querySQL.toString()); + + Map siteExpression = task.getSiteExpression(); // site -> Expression + Map tmpTableMap = task.getTmpTableMap(); // site -> tmpTableName + + boolean flag = dispense(siteExpression, tmpTableMap, rs, cols, schema, table); + if (flag == true) { + conn.close(); + return PardResultSet.okResultSet; + } + else { + conn.close(); + return PardResultSet.execErrResultSet; + } + } + catch (SQLException e) { + e.printStackTrace(); + } + finally { + try { + conn.close(); + } + catch (SQLException e) { + e.printStackTrace(); + } + } + return PardResultSet.execErrResultSet; + } + + public PardResultSet executeCreateTmpTable(Connection conn, CreateTmpTableTask task) + { + String schemaName = task.getSchemaName(); + String tableName = task.getTableName(); + DropTableTask dropTableTask = new DropTableTask(schemaName, tableName); + //PardResultSet prsDropTempTale = dropTempTable(conn, dropTableTask); + CreateTableTask createTableTask = new CreateTableTask(schemaName, tableName, false, task.getColumnDefinitions()); + PardResultSet prsCreateTempTable = createTempTable(conn, createTableTask); + String filePath1 = task.getPath(); + try { + BufferedReader br = new BufferedReader(new FileReader(new File(filePath1))); + BufferedWriter bw = new BufferedWriter(new FileWriter(new File(filePath1 + "tmp"))); + String readIn = br.readLine(); + readIn = br.readLine(); + while ((readIn = br.readLine()) != null) { + bw.write(readIn); + bw.newLine(); + } + br.close(); + bw.flush(); + bw.close(); + } + catch (IOException e) { + e.printStackTrace(); + } + LoadTask loadTask = new LoadTask(schemaName, tableName, ImmutableList.of(filePath1 + "tmp")); + PardResultSet prsLoadTemoTable = loadTmpTable(conn, loadTask); + return prsLoadTemoTable; + } + + private PardResultSet loadTmpTable(Connection conn, LoadTask task) + { + String schema = task.getSchema(); + String table = task.getTable(); + List paths = task.getPaths(); + try { + PgConnection pgConnection; + if (!conn.isWrapperFor(PgConnection.class)) { + return PardResultSet.execErrResultSet; + } + pgConnection = conn.unwrap(PgConnection.class); + CopyManager copyManager = new CopyManager(pgConnection); + for (String path : paths) { + if (schema != null) { + logger.info("Copying " + path + " into " + schema + "." + table); + String sql = "COPY " + schema + "." + table + " FROM STDIN DELIMITER E'\t'"; + logger.info("Postgres connector: " + sql); + File file = new File(path); + InputStream inputStream = new FileInputStream(file); + copyManager.copyIn(sql, inputStream); + file.deleteOnExit(); + } + else { + logger.info("Copying " + path + " into " + table); + String sql = "COPY " + table + " FROM STDIN DELIMITER E'\t'"; + logger.info("Postgres connector: " + sql); + File file = new File(path); + InputStream inputStream = new FileInputStream(file); + copyManager.copyIn(sql, inputStream); + file.deleteOnExit(); + } + } + PardResultSet resultSet = new PardResultSet(PardResultSet.ResultStatus.OK); + RowConstructor rowConstructor = new RowConstructor(); + rowConstructor.appendString(PardResultSet.ResultStatus.OK.toString()); + resultSet.add(rowConstructor.build()); + return resultSet; + } + catch (SQLException | IOException e) { + e.printStackTrace(); + } + finally { + try { + conn.close(); + } + catch (SQLException e) { + e.printStackTrace(); + } + } + return PardResultSet.execErrResultSet; + } + + private PardResultSet dropTempTable(Connection conn, DropTableTask task) + { + try { + Statement statement = conn.createStatement(); + String dropTableSQL; + if (task.getSchemaName() == null) { + dropTableSQL = "drop table if exists " + task.getTableName(); + } + else { + dropTableSQL = "drop table if exists " + task.getSchemaName() + "." + task.getTableName(); + } + logger.info("Postgres connector: " + dropTableSQL); + int status = statement.executeUpdate(dropTableSQL); + if (status == 0) { + logger.info("DROP TEMP TABLE SUCCESSFULLY"); + return PardResultSet.okResultSet; + } + } + catch (SQLException e) { + logger.info("DROP TEMP TABLE FAILED"); + e.printStackTrace(); + } + return PardResultSet.execErrResultSet; + } + + private PardResultSet createTempTable(Connection conn, CreateTableTask task) + { + try { + StringBuilder createTableSQL = new StringBuilder("create table if not exists "); + if (task.getSchemaName() != null) { + createTableSQL.append(task.getSchemaName() + "." + task.getTableName() + "("); + } + else { + createTableSQL.append(task.getTableName() + "("); + } + for (Column cd : task.getColumnDefinitions()) { + if (cd.getKey() == 1) { + createTableSQL.append(cd.getColumnName()).append(" ").append(getTypeString(cd.getDataType(), cd.getLen())).append(" primary key "); + } + else { + createTableSQL.append(cd.getColumnName()).append(" ").append(getTypeString(cd.getDataType(), cd.getLen())); + } + createTableSQL.append(" ,"); + } + createTableSQL = new StringBuilder(createTableSQL.substring(0, createTableSQL.length() - 1)); + createTableSQL.append(")"); + logger.info("Connector: " + createTableSQL.toString()); + Statement statement = conn.createStatement(); + int status = statement.executeUpdate(createTableSQL.toString()); + if (status == 0) { + logger.info("CREATE TEMP TABLE SUCCESSFULLY"); + return PardResultSet.okResultSet; + } + } + catch (SQLException e) { + logger.info("CREATE TEMP TABLE FAILED"); + e.printStackTrace(); + } + return PardResultSet.execErrResultSet; + } + + private PardResultSet executeJoin(Connection conn, JoinTask task) + { + String tmpTableName = task.getTmpTableName(); + PlanNode rootNode = task.getNode(); + System.out.println(rootNode); + boolean isVertical = false; + try { + Statement statement = conn.createStatement(); + StringBuilder joinSQL = new StringBuilder("SELECT "); + List nodeList = new ArrayList<>(); + int nodeListCursor = 0; + boolean isProject = false; + boolean isSort = false; + boolean isLimit = false; + ProjectNode projectNode = null; + LimitNode limitNode = null; + SortNode sortNode = null; + JoinNode joinNode = null; + nodeList.add(rootNode); + nodeListCursor++; + + while (nodeList.get(nodeListCursor - 1).hasChildren()) { + nodeList.add(nodeList.get(nodeListCursor - 1).getLeftChild()); + nodeListCursor++; + if (nodeList.get(nodeListCursor - 1) instanceof JoinNode) { + joinNode = (JoinNode) nodeList.get(nodeListCursor - 1); + break; + } + } + + for (int i = nodeListCursor - 1; i >= 0; i--) { + if (nodeList.get(i) instanceof LimitNode) { + limitNode = (LimitNode) nodeList.get(i); + isLimit = true; + } + + if (nodeList.get(i) instanceof SortNode) { + sortNode = (SortNode) nodeList.get(i); + isSort = true; + } + + if (nodeList.get(i) instanceof ProjectNode) { + projectNode = (ProjectNode) nodeList.get(i); + isProject = true; + } + } + + if (isProject) { + List columns = projectNode.getColumns(); + for (Column column : columns) { + /* + if (column.getTableName() != null) { + joinSQL.append(column.getTableName()).append("."); + } + joinSQL.append(column.getColumnName()); + joinSQL.append(",");*/ + joinSQL.append(column.toString()); + joinSQL.append(","); + } + joinSQL = new StringBuilder(joinSQL.substring(0, joinSQL.length() - 1)); + } + else { + joinSQL.append(" *"); + } + + StringBuilder fromClause = new StringBuilder(" FROM "); + StringBuilder joinCondition = new StringBuilder(" WHERE "); + StringBuilder whereClause = new StringBuilder(" "); + List schemaTableName = new ArrayList<>(); + List tableNameArray = new ArrayList<>(); + List tableAliasArray = new ArrayList<>(); + List joinChildren = joinNode.getJoinChildren(); + Iterator it = joinChildren.iterator(); + Boolean isFirst = true; + while (it.hasNext()) { + PlanNode childRootNode = (PlanNode) it.next(); + List childNodeList = new ArrayList<>(); + int childNodeListCursor = 0; + ProjectNode childProjectNode = null; + FilterNode childFilterNode = null; + TableScanNode childTableScanNode = null; + boolean childIsProject = false; + boolean childIsFilter = false; + boolean childIsTableScan = false; + childNodeList.add(childRootNode); + childNodeListCursor++; + while (childNodeList.get(childNodeListCursor - 1).hasChildren()) { + childNodeList.add(childNodeList.get(childNodeListCursor - 1).getLeftChild()); + childNodeListCursor++; + } + for (int i = childNodeListCursor - 1; i >= 0; i--) { + if (childNodeList.get(i) instanceof ProjectNode) { + childProjectNode = (ProjectNode) childNodeList.get(i); + childIsProject = true; + } + if (childNodeList.get(i) instanceof FilterNode) { + childFilterNode = (FilterNode) childNodeList.get(i); + childIsFilter = true; + } + if (childNodeList.get(i) instanceof TableScanNode) { + childTableScanNode = (TableScanNode) childNodeList.get(i); + childIsTableScan = true; + } + } + //HERE WE IGNORE THE childProjectNode + if (childIsFilter) { + whereClause.append(" AND " + childFilterNode.getExpression()); + } + if (childIsTableScan) { + String schemaName = childTableScanNode.getSchema(); + String tableName = childTableScanNode.getTable(); + String aliasName = childTableScanNode.getAlias(); + if (isFirst) { + if (aliasName == null) { + fromClause.append(schemaName + "." + tableName); + fromClause.append(" , "); + tableAliasArray.add(tableName); + } + else { + fromClause.append(schemaName + "." + tableName + " as " + aliasName); + fromClause.append(" , "); + tableAliasArray.add(aliasName); + } + schemaTableName.add(schemaName + "." + tableName); + tableNameArray.add(tableName); + isFirst = false; + } + else { + if (aliasName == null) { + fromClause.append(schemaName + "." + tableName); + tableAliasArray.add(tableName); + } + else { + fromClause.append(schemaName + "." + tableName + " as " + aliasName); + tableAliasArray.add(aliasName); + } + schemaTableName.add(schemaName + "." + tableName); + } + } + } + + if (joinNode.getExprList().size() > 0) { + joinCondition.append(joinNode.getExprList().get(0).toString()); + } + else { + String joinColumn = ((String) (joinNode.getJoinSet().iterator().next())); + for (int i = 0; i < tableAliasArray.size(); i++) { + joinCondition.append(tableAliasArray.get(i) + "." + joinColumn); + if (i != schemaTableName.size() - 1) { + joinCondition.append(" = "); + } + } + joinCondition.append(" "); + } + + if (isSort) { + whereClause.append("ORDER BY"); + List columns = sortNode.getColumns(); + for (Column column : columns) { + whereClause.append(" "); + whereClause.append(column.getTableName() + "." + column.getColumnName()); + whereClause.append(","); + } + whereClause = new StringBuilder(whereClause.substring(0, whereClause.length() - 1)); + } + if (isLimit) { + whereClause.append(" LIMIT "); + whereClause.append(limitNode.getLimitNum()); + } + joinSQL.append(fromClause.toString() + joinCondition.toString() + whereClause.toString()); + logger.info("Postgres connector: " + joinSQL); + ResultSet rs = statement.executeQuery(joinSQL.toString()); + List columns = new ArrayList<>(); + if (isProject) { + columns = projectNode.getColumns(); + } + logger.info("JOIN SUCCESSFULLY"); + PardResultSet prs = new PardResultSet(PardResultSet.ResultStatus.OK, columns); + prs.setJdbcResultSet(rs); + prs.setJdbcConnection(conn); + return prs; + } + catch (SQLException e) { + logger.info("JOIN FAILED"); + e.printStackTrace(); + } + return PardResultSet.execErrResultSet; + } + + private boolean dispense(Map siteExpression, Map tmpTableMap, ResultSet rs, List columns, String schema, String table) + { + boolean isSucceeded; + Map localWriter = new HashMap(); // site -> local BufferedWirter + for (String site : siteExpression.keySet()) { + try { + BufferedWriter bw = new BufferedWriter(new FileWriter(new File("/dev/shm/" + site + tmpTableMap.get(site) + "SENDDATA"))); + localWriter.put(site, bw); + bw.write(schema + "\t" + tmpTableMap.get(site) + "\t" + table + "\n"); //schema name, table name + Iterator it = columns.iterator(); + String secondLine = ""; + while (it.hasNext()) { + secondLine += ((Column) it.next()).getColumnName() + "\t"; // column names + } + secondLine = secondLine.substring(0, secondLine.length() - 1); + bw.write(secondLine); + bw.write("\n"); + bw.flush(); + } + catch (IOException e) { + e.printStackTrace(); + } + } + + try { + ResultSetMetaData rsmd = rs.getMetaData(); + int colNum = rsmd.getColumnCount(); + while (rs.next()) { + RowConstructor rowConstructor = new RowConstructor(); + List colTypes = new ArrayList<>(); + + for (int i = 0; i < colNum; i++) { + switch (rsmd.getColumnType(i + 1)) { + case Types.CHAR: + rowConstructor.appendString(rs.getString(i + 1)); + colTypes.add(DataType.CHAR.getType()); + break; + + case Types.VARCHAR: + rowConstructor.appendString(rs.getString(i + 1)); + colTypes.add(DataType.VARCHAR.getType()); + break; + + case Types.DATE: + rowConstructor.appendString(rs.getString(i + 1).toString()); + colTypes.add(DataType.DATE.getType()); + break; + + case Types.INTEGER: + rowConstructor.appendInt(rs.getInt(i + 1)); + colTypes.add(DataType.INT.getType()); + break; + + case Types.FLOAT: + rowConstructor.appendFloat(rs.getFloat(i + 1)); + colTypes.add(DataType.FLOAT.getType()); + break; + + case Types.DOUBLE: + rowConstructor.appendDouble(rs.getDouble(i + 1)); + colTypes.add(DataType.DOUBLE.getType()); + break; + + default: + break; + } + } + Row row = rowConstructor.build(); + for (Map.Entry entry : siteExpression.entrySet()) { + boolean ifTrue = compare(entry.getValue(), row, columns); + if (ifTrue) { + localWriter.get(entry.getKey()).write(rowConstructor.printRow(row, colTypes) + "\n"); + } + } + } + + for (Map.Entry entry : localWriter.entrySet()) { + entry.getValue().flush(); + entry.getValue().close(); + } + + isSucceeded = true; + return isSucceeded; + } + catch (SQLException e) { + e.printStackTrace(); + isSucceeded = false; + } + catch (IOException e) { + e.printStackTrace(); + isSucceeded = false; + } + return isSucceeded; + } + + private Boolean compare(Expression expr, Row row, List col) + { + List types = new ArrayList(); + col.forEach(x -> types.add(x.getDataType())); + String[] list = RowConstructor.printRow(row, types).split("\t"); + List ciList = new ArrayList(); + List vList = new ArrayList(); + Expr e = Expr.parse(expr); + for (int i = 0; i < list.length; i++) { + ColumnItem ci = new ColumnItem(col.get(i).getTableName(), col.get(i).getColumnName(), col.get(i).getDataType()); + ValueItem vi = new ValueItem(parseFromString(col.get(i).getDataType(), list[i])); + ciList.add(ci); + vList.add(vi); + } + for (int i = 0; i < list.length; i++) { + ColumnItem ci = ciList.get(i); + ValueItem vi = vList.get(i); + e = Expr.generalReplace(e, ci, vi); + } +// System.out.println(e.toString()); + e = Expr.optimize(e, Expr.LogicOperator.AND); + if (e instanceof TrueExpr) { + return true; + } + else if (e instanceof FalseExpr) { + return false; + } + return null; + } + + private static Comparable parseFromString(int dataType, String value) + { + switch(dataType) { + case DataType.DataTypeInt.SMALLINT: + case DataType.DataTypeInt.BIGINT: + case DataType.DataTypeInt.INT: + return Long.parseLong(value); + case DataType.DataTypeInt.FLOAT: + case DataType.DataTypeInt.DOUBLE: + return Double.parseDouble(value); + case DataType.DataTypeInt.TEXT: + case DataType.DataTypeInt.CHAR: + case DataType.DataTypeInt.VARCHAR: + return value; + case DataType.DataTypeInt.TIME: + case DataType.DataTypeInt.DATE: + case DataType.DataTypeInt.TIMESTAMP: + return value; + } + return value; + } + private String getTypeString(int type, int length) { if (type == DataType.INT.getType()) { diff --git a/pard-connector-postgresql/src/test/java/cn/edu/ruc/iir/pard/connector/postgresql/TestConnector.java b/pard-connector-postgresql/src/test/java/cn/edu/ruc/iir/pard/connector/postgresql/TestConnector.java index 25a625d..65da2cf 100644 --- a/pard-connector-postgresql/src/test/java/cn/edu/ruc/iir/pard/connector/postgresql/TestConnector.java +++ b/pard-connector-postgresql/src/test/java/cn/edu/ruc/iir/pard/connector/postgresql/TestConnector.java @@ -43,7 +43,7 @@ public class TestConnector @BeforeTest public void init() { - configuration.init("../pard-main/etc/pard.properties"); + configuration.init("../pard-main/etc/pard-wentao.properties"); } @Test @@ -199,4 +199,21 @@ public void testLoad() LoadTask loadTask = new LoadTask("pard", "emp", ImmutableList.of("/Users/Jelly/Desktop/emp.tsv")); connector.execute(loadTask); } + + @Test + public void testJoin() + { + PardUserConfiguration configuration = PardUserConfiguration.INSTANCE(); + Connector connector = PostgresConnector.INSTANCE(); + } + + @Test + public void testSendData() + { + PardUserConfiguration configuration = PardUserConfiguration.INSTANCE(); + Connector connector = PostgresConnector.INSTANCE(); + } + public void testQuery() + { + } } diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeBlockHandler.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeBlockHandler.java similarity index 100% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeBlockHandler.java rename to pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeBlockHandler.java diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java similarity index 85% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java rename to pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java index d928a6b..eca57ab 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileSendHandler.java @@ -66,8 +66,14 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) throws IOExceptio } } - ChannelFuture f = ctx.writeAndFlush(new DefaultFileRegion(raf.getChannel(), 0, length)); - f.addListener((ChannelFutureListener) future -> ctx.writeAndFlush("OKDONE\n")); + if (path.endsWith("SENDDATA")) { // SEND DATA TASK + ChannelFuture f = ctx.writeAndFlush(new DefaultFileRegion(raf.getChannel(), 0, length)); + f.addListener((ChannelFutureListener) future -> ctx.writeAndFlush("OKDONESENDDATA\n")); + } + else { // LOAD TASK + ChannelFuture f = ctx.writeAndFlush(new DefaultFileRegion(raf.getChannel(), 0, length)); + f.addListener((ChannelFutureListener) future -> ctx.writeAndFlush("OKDONE\n")); + } } if (message.equalsIgnoreCase("OK")) { PardResultSet resultSet = new PardResultSet(PardResultSet.ResultStatus.OK); diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardExchangeClient.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/PardExchangeClient.java similarity index 100% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardExchangeClient.java rename to pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/PardExchangeClient.java diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeClient.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeClient.java similarity index 100% rename from pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeClient.java rename to pard-connector/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeClient.java diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/CreateTmpTableTask.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/CreateTmpTableTask.java new file mode 100644 index 0000000..297e5d8 --- /dev/null +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/CreateTmpTableTask.java @@ -0,0 +1,49 @@ +package cn.edu.ruc.iir.pard.executor.connector; + +import cn.edu.ruc.iir.pard.catalog.Column; + +import java.util.List; + +public class CreateTmpTableTask + extends Task +{ + private static final long serialVersionUID = 5L; + private final String schemaName; + private final String tableName; + private final List columnDefinitions; + private final String path; + + public CreateTmpTableTask(String schemaName, String tableName, List columnDefinitions, String path) + { + this(schemaName, tableName, columnDefinitions, path, null); + } + + public CreateTmpTableTask(String schemaName, String tableName, List columnDefinitions, String path, String site) + { + super(site); + this.schemaName = schemaName; + this.tableName = tableName; + this.columnDefinitions = columnDefinitions; + this.path = path; + } + + public String getSchemaName() + { + return schemaName; + } + + public String getTableName() + { + return tableName; + } + + public List getColumnDefinitions() + { + return columnDefinitions; + } + + public String getPath() + { + return path; + } +} diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/JoinTask.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/JoinTask.java new file mode 100644 index 0000000..9664666 --- /dev/null +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/JoinTask.java @@ -0,0 +1,66 @@ +package cn.edu.ruc.iir.pard.executor.connector; + +import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; + +public class JoinTask + extends Task +{ + /** + * + */ + private static final long serialVersionUID = -5881505715821305268L; + private String taskId; + private PlanNode node; + private String tmpTableName; + + public JoinTask(String site) + { + super(site); + this.tmpTableName = null; + this.node = null; + } + + public JoinTask(String site, PlanNode node) + { + super(site); + this.node = node; + this.tmpTableName = null; + } + + public JoinTask(String site, PlanNode node, String tmpTableName) + { + super(site); + this.node = node; + this.tmpTableName = tmpTableName; + } + + public String getTaskId() + { + return taskId; + } + + public void setTaskId(String taskId) + { + this.taskId = taskId; + } + + public PlanNode getNode() + { + return node; + } + + public void setNode(PlanNode node) + { + this.node = node; + } + + public String getTmpTableName() + { + return tmpTableName; + } + + public void setTmpTableName(String tmpTableName) + { + this.tmpTableName = tmpTableName; + } +} diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/PardResultSet.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/PardResultSet.java index c5a4a34..1a255f0 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/PardResultSet.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/PardResultSet.java @@ -62,6 +62,7 @@ public String toString() private int currentSize = 0; private String taskId; private long executionTime; + private String semanticErrmsg; private transient ResultSet jdbcResultSet = null; private transient Connection connection = null; @@ -74,6 +75,11 @@ public PardResultSet(ResultStatus resultStatus) { this(resultStatus, ImmutableList.of(), defaultCapacity); } + public PardResultSet(ResultStatus resultStatus, String msg) + { + this(resultStatus, ImmutableList.of(), defaultCapacity); + semanticErrmsg = msg; + } public PardResultSet(ResultStatus resultStatus, List schema) { @@ -248,6 +254,16 @@ public void setExecutionTime(long executionTime) this.executionTime = executionTime; } + public String getSemanticErrmsg() + { + return semanticErrmsg; + } + + public void setSemanticErrmsg(String semanticErrmsg) + { + this.semanticErrmsg = semanticErrmsg; + } + @Override public String toString() { diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/SendDataTask.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/SendDataTask.java new file mode 100644 index 0000000..de9ee75 --- /dev/null +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/SendDataTask.java @@ -0,0 +1,77 @@ +package cn.edu.ruc.iir.pard.executor.connector; + +import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; +import cn.edu.ruc.iir.pard.sql.tree.Expression; + +import java.util.HashMap; +import java.util.Map; + +public class SendDataTask + extends Task +{ + /** + * + */ + private static final long serialVersionUID = 1L; + private String taskId; + private String schemaName; + private PlanNode node; + private Map siteExpression; // site -> Expression + private Map tmpTableMap; // site -> tmpTableName + public SendDataTask(String site) + { + super(site); + siteExpression = new HashMap(); + tmpTableMap = new HashMap(); + } + + public String getTaskId() + { + return taskId; + } + + public void setTaskId(String taskId) + { + this.taskId = taskId; + } + + public String getSchemaName() + { + return schemaName; + } + + public void setSchemaName(String schemaName) + { + this.schemaName = schemaName; + } + + public PlanNode getNode() + { + return node; + } + + public void setNode(PlanNode node) + { + this.node = node; + } + + public Map getSiteExpression() + { + return siteExpression; + } + + public void setSiteExpression(Map siteExpression) + { + this.siteExpression = siteExpression; + } + + public Map getTmpTableMap() + { + return tmpTableMap; + } + + public void setTmpTableMap(Map tmpTableMap) + { + this.tmpTableMap = tmpTableMap; + } +} diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/UnionTask.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/UnionTask.java new file mode 100644 index 0000000..f161248 --- /dev/null +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/UnionTask.java @@ -0,0 +1,29 @@ +package cn.edu.ruc.iir.pard.executor.connector; + +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.List; + +public class UnionTask + extends Task +{ + /** + * + */ + private static final long serialVersionUID = -7442789904751335533L; + private List waitTask; + public UnionTask(String site) + { + super(site); + waitTask = new ArrayList(); + } + public List getWaitTask() + { + return waitTask; + } + public void setWaitTask(List waitTask) + { + this.waitTask = ImmutableList.copyOf(waitTask); + } +} diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/CartesianNode.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/CartesianNode.java index be869c3..bf2a079 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/CartesianNode.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/CartesianNode.java @@ -18,5 +18,6 @@ public CartesianNode(CartesianNode node) { super(node); this.name = "Cartesian"; + this.childrens.addAll(node.childrens); } } diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/FilterNode.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/FilterNode.java index 546007f..ccae55d 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/FilterNode.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/FilterNode.java @@ -13,7 +13,7 @@ public class FilterNode extends PlanNode { private static final long serialVersionUID = 8695368351559462220L; - private final Expression expression; + private Expression expression; public FilterNode(Expression expression) { @@ -30,7 +30,10 @@ public Expression getExpression() { return expression; } - + public void setExpression(Expression expression) + { + this.expression = expression; + } @Override public String toString() { diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/JoinNode.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/JoinNode.java index 237b0dd..3892606 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/JoinNode.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/JoinNode.java @@ -1,7 +1,9 @@ package cn.edu.ruc.iir.pard.executor.connector.node; +import cn.edu.ruc.iir.pard.sql.tree.ComparisonExpression; import static com.google.common.base.MoreObjects.toStringHelper; +import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -16,23 +18,34 @@ public class JoinNode { private static final long serialVersionUID = 3355047142533066940L; private Set joinSet; + private List exprList; + private String otherInfo; + public JoinNode() { name = "JOIN"; joinSet = new HashSet(); + exprList = new ArrayList(); } + public JoinNode(JoinNode node) { super(node); name = "JOIN"; joinSet = new HashSet(); joinSet.addAll(node.joinSet); + exprList = new ArrayList(); + exprList.addAll(node.getExprList()); + childrens.clear(); + childrens.addAll(node.getJoinChildren()); } + public boolean hasChildren() { return !childrens.isEmpty(); } - public void addUnionChild(PlanNode node) + + public void addJoinChild(PlanNode node) { this.childrens.add(node); } @@ -41,10 +54,12 @@ public List getJoinChildren() { return childrens; } + public Set getJoinSet() { return joinSet; } + @Override public String toString() { @@ -52,6 +67,22 @@ public String toString() .add("name", "JOIN") .add("children", childrens) .add("joinSet", this.joinSet) + .add("exprList", this.exprList) .toString(); } + + public String getOtherInfo() + { + return otherInfo; + } + + public void setOtherInfo(String otherInfo) + { + this.otherInfo = otherInfo; + } + + public List getExprList() + { + return exprList; + } } diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/NodeHelper.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/NodeHelper.java index 3573917..56519ed 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/NodeHelper.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/NodeHelper.java @@ -55,7 +55,12 @@ public static String parseColumns(List list) { StringBuilder sb = new StringBuilder("["); for (int i = 0; i < list.size(); i++) { - sb.append(list.get(i).getColumnName()); + if (list.get(i).getTableName() != null) { + sb.append(list.get(i).getTableName() + "." + list.get(i).getColumnName()); + } + else { + sb.append(list.get(i).getColumnName()); + } if (i != list.size() - 1) { sb.append(","); } @@ -78,6 +83,9 @@ public static String parseLists(List list) public static List getChildren(PlanNode node) { List list = new ArrayList(); + if (node == null) { + return list; + } if (node instanceof UnionNode) { UnionNode union = (UnionNode) node; return union.getUnionChildren(); @@ -99,12 +107,16 @@ else if (node instanceof InputNode) { public static Map getPlanNodeInfo(PlanNode node) { Map obj = new HashMap(); + if (node == null) { + return obj; + } if (node instanceof TableScanNode) { TableScanNode cnode = (TableScanNode) node; obj.put("name", "TABLESCAN"); obj.put("schema", cnode.getSchema()); obj.put("table", cnode.getTable()); obj.put("site", cnode.getSite()); + obj.put("alias", cnode.getAlias()); //obj.put("child", toJSON(cnode.getLeftChild())); return obj; } @@ -135,12 +147,22 @@ else if (node instanceof LimitNode) { LimitNode cnode = (LimitNode) node; obj.put("name", "LIMIT"); obj.put("number", ((LimitNode) node).getLimitNum() + ""); + return obj; } else if (node instanceof JoinNode) { JoinNode cnode = (JoinNode) node; obj.put("name", "JOIN"); - JSONArray array = new JSONArray(); + if (!cnode.getJoinSet().isEmpty()) { + obj.put("joinColumn", cnode.getJoinSet().toString()); + } + if (!cnode.getExprList().isEmpty()) { + obj.put("exprList", cnode.getExprList().toString()); + } + if (cnode.getOtherInfo() != null) { + obj.put("mark", cnode.getOtherInfo()); + } // obj.put("joinSet", cnode.getJoinSet()); + return obj; } else if (node instanceof InputNode) { InputNode cnode = (InputNode) node; @@ -163,7 +185,10 @@ else if (node instanceof AggregationNode) { obj.put("name", "AggragationNode"); return obj; } - return null; + else { + System.out.println(node.getClass().getName()); + } + return obj; } public static JSONObject toJSON(PlanNode node) { diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/PlanNode.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/PlanNode.java index a33bd17..f4fe3db 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/PlanNode.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/PlanNode.java @@ -47,7 +47,7 @@ public void setChildren(PlanNode planNode, boolean left, boolean exists) if (childrenNum >= 2) { return; } - childrenNum++; + //childrenNum++; } if (left) { children[0] = planNode; @@ -55,6 +55,7 @@ public void setChildren(PlanNode planNode, boolean left, boolean exists) else { children[1] = planNode; } + childrenNum = (children[0] == null ? 0 : 1) + (children[1] == null ? 0 : 1); } public String getName() diff --git a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/TableScanNode.java b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/TableScanNode.java index b3125e1..6732871 100644 --- a/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/TableScanNode.java +++ b/pard-connector/src/main/java/cn/edu/ruc/iir/pard/executor/connector/node/TableScanNode.java @@ -11,9 +11,10 @@ public class TableScanNode extends InputNode { private static final long serialVersionUID = 2673717961909269975L; - private final String schema; - private final String table; + private String schema; + private String table; private String site; + private String alias; /* private Optional limit; private Optional sort; @@ -136,6 +137,15 @@ public String toString() .add("table", table) .add("site", site) .add("child", getLeftChild()) + .add("alias", alias) .toString(); } + public String getAlias() + { + return alias; + } + public void setAlias(String alias) + { + this.alias = alias; + } } diff --git a/pard-executor/src/main/java/cn/edu/ruc/iir/pard/executor/PardTaskExecutor.java b/pard-executor/src/main/java/cn/edu/ruc/iir/pard/executor/PardTaskExecutor.java index a3e4475..61e9e1f 100644 --- a/pard-executor/src/main/java/cn/edu/ruc/iir/pard/executor/PardTaskExecutor.java +++ b/pard-executor/src/main/java/cn/edu/ruc/iir/pard/executor/PardTaskExecutor.java @@ -1,15 +1,28 @@ package cn.edu.ruc.iir.pard.executor; import cn.edu.ruc.iir.pard.catalog.Column; +import cn.edu.ruc.iir.pard.catalog.Site; import cn.edu.ruc.iir.pard.commons.memory.Row; +import cn.edu.ruc.iir.pard.commons.utils.DataType; +import cn.edu.ruc.iir.pard.commons.utils.RowConstructor; +import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; +import cn.edu.ruc.iir.pard.exchange.PardFileExchangeClient; import cn.edu.ruc.iir.pard.executor.connector.Block; import cn.edu.ruc.iir.pard.executor.connector.Connector; import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; +import cn.edu.ruc.iir.pard.executor.connector.QueryTask; +import cn.edu.ruc.iir.pard.executor.connector.SendDataTask; import cn.edu.ruc.iir.pard.executor.connector.Task; +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.ConcurrentLinkedQueue; import java.util.logging.Logger; /** @@ -54,8 +67,108 @@ public PardResultSet executeStatus(Task task) public Block executeQuery(Task task) { + if (task instanceof QueryTask) { + return executeSelect(task); + } + if (task instanceof SendDataTask) { + return executeSendData(task); + } + else { // task instanceof JoinTask + return executeJoin(task); + } + } + + private Block executeSendData(Task task) + { + //Map taskMap = new HashMap<>(); + SiteDao siteDao = new SiteDao(); String taskId = task.getTaskId(); - logger.info("Executing task " + taskId); + logger.info("Executing SendDataTask " + taskId); + if (!resultSetMap.containsKey(taskId)) { + PardResultSet pardResultSet = connector.execute(task); + resultSetMap.put(taskId, pardResultSet); + sequenceIds.put(taskId, 0); + schemaMap.put(taskId, pardResultSet.getSchema()); + } + + PardResultSet resultSet = resultSetMap.get(taskId); + int seq = sequenceIds.get(taskId) + 1; + List column = new ArrayList<>(); + Column col0 = new Column(); + col0.setDataType(DataType.INT.getType()); + col0.setColumnName("id"); + col0.setTableName(((SendDataTask) task).getSchemaName()); + column.add(col0); + Block block = new Block(column, 50 * 1024 * 1024, seq, taskId); + block.setSequenceHasNext(false); + sequenceIds.put(taskId, seq); + if (resultSet.getStatus() == PardResultSet.ResultStatus.OK) { + for (Map.Entry entry : ((SendDataTask) task).getTmpTableMap().entrySet()) { + Site nodeSite = siteDao.listNodes().get(entry.getKey()); + + File file = new File("/dev/shm/" + entry.getKey() + + ((SendDataTask) task).getTmpTableMap().get(entry.getKey()) + + "SENDDATA"); + + if (file.exists() && fileLength(file) > 2) { + ConcurrentLinkedQueue resultSets = new ConcurrentLinkedQueue<>(); + PardFileExchangeClient pfec = new PardFileExchangeClient(nodeSite.getIp(), + nodeSite.getFileExchangePort(), + file.getPath(), + ((SendDataTask) task).getSchemaName(), + ((SendDataTask) task).getTmpTableMap().get(entry.getKey()), + task.getTaskId(), + resultSets); + pfec.run(); + //taskMap.put(task.getTaskId(), task); + } + } + return block; + } + else { + RowConstructor rowConstructor = new RowConstructor(); + rowConstructor.appendInt(0); + block.setSequenceHasNext(true); + block.addRow(rowConstructor.build()); + return block; + } + } + + private Block executeJoin(Task task) + { + String taskId = task.getTaskId(); + logger.info("Executing JoinTask " + taskId); + if (!resultSetMap.containsKey(taskId)) { + PardResultSet pardResultSet = connector.execute(task); + resultSetMap.put(taskId, pardResultSet); + sequenceIds.put(taskId, 0); + schemaMap.put(taskId, pardResultSet.getSchema()); + } + PardResultSet resultSet = resultSetMap.get(taskId); + int seq = sequenceIds.get(taskId) + 1; + Block block = new Block(schemaMap.get(taskId), 50 * 1024 * 1024, seq, taskId); + block.setSequenceHasNext(false); + sequenceIds.put(taskId, seq); + Row row; + while ((row = resultSet.getNext()) != null) { + if (!block.addRow(row)) { + block.setSequenceHasNext(true); + break; + } + } + logger.info("Result block num: " + block.getRows().size()); + if (!block.isSequenceHasNext()) { + resultSetMap.remove(taskId); + schemaMap.remove(taskId); + sequenceIds.remove(taskId); + } + return block; + } + + private Block executeSelect(Task task) + { + String taskId = task.getTaskId(); + logger.info("Executing QueryTask " + taskId); if (!resultSetMap.containsKey(taskId)) { PardResultSet pardResultSet = connector.execute(task); @@ -84,4 +197,21 @@ public Block executeQuery(Task task) } return block; } + + private int fileLength(File file) + { + int counter = 0; + try { + BufferedReader br = new BufferedReader(new FileReader(file)); + String str = null; + while ((str = br.readLine()) != null) { + counter++; + } + br.close(); + } + catch (IOException e) { + e.printStackTrace(); + } + return counter; + } } diff --git a/pard-main/etc/sbin/pard-server b/pard-main/etc/sbin/pard-server index 10aa381..6927dae 100755 --- a/pard-main/etc/sbin/pard-server +++ b/pard-main/etc/sbin/pard-server @@ -22,7 +22,7 @@ if [ $1 == "start" ] ; then if [ -e ../pid ] ; then echo "Pard already started. Please stop it first." else - nohup java -jar ../bin/pard-server.jar ../pard.properties & + nohup java -jar ../bin/pard-server.jar ../pard.properties >../nohup.log 2>&1 & pid=$! echo ${pid} > ../pid echo "Pard is running in background as" ${pid} diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileReceiveHandler.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileReceiveHandler.java index 272e264..68eca5b 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileReceiveHandler.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeFileReceiveHandler.java @@ -1,6 +1,10 @@ package cn.edu.ruc.iir.pard.exchange; +import cn.edu.ruc.iir.pard.catalog.Column; +import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.etcd.dao.TableDao; import cn.edu.ruc.iir.pard.executor.PardTaskExecutor; +import cn.edu.ruc.iir.pard.executor.connector.CreateTmpTableTask; import cn.edu.ruc.iir.pard.executor.connector.LoadTask; import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; import cn.edu.ruc.iir.pard.executor.connector.Task; @@ -10,10 +14,15 @@ import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; +import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; +import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; import java.util.logging.Logger; /** @@ -74,6 +83,33 @@ else if (message.equalsIgnoreCase("OKDONE")) { ChannelFuture future = ctx.writeAndFlush(resultSet.getStatus().toString() + "\n"); future.addListener((ChannelFutureListener) f -> ctx.close()); } + else if (message.equalsIgnoreCase("OKDONESENDDATA")) { + logger.info("File Writer close"); + if (writer != null) { + writer.close(); + } + BufferedReader br = new BufferedReader(new FileReader(new File(this.path))); + String[] header = br.readLine().split("\t"); + logger.info("SEND DATA HEADER: " + header[0] + " " + header[1]); + TableDao tableDao = new TableDao(header[0]); + Table table = tableDao.loadByName(header[2]); + HashMap tableColumn = table.getColumns(); + List columnDefinitions = new ArrayList<>(); + String[] columnNames = br.readLine().split("\t"); + for (String s : columnNames) { + columnDefinitions.add(tableColumn.get(s)); + } + br.close(); + //TODO table name is what? + Task task = new CreateTmpTableTask(header[0], header[1], columnDefinitions, this.path); + //Task task = new CreateTmpTableTask(null, header[1], columnDefinitions, this.path); + PardResultSet resultSet = executor.executeStatus(task); + logger.info("Create tmp table: " + resultSet.getStatus().toString()); + writer = null; + path = null; + ChannelFuture future = ctx.writeAndFlush(resultSet.getStatus().toString() + "\n"); + future.addListener((ChannelFutureListener) f -> ctx.close()); + } else { if (writer != null) { writer.write(message + "\n"); diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeTaskHandler.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeTaskHandler.java index 0314805..a6fd697 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeTaskHandler.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/ExchangeTaskHandler.java @@ -3,11 +3,19 @@ import cn.edu.ruc.iir.pard.executor.PardTaskExecutor; import cn.edu.ruc.iir.pard.executor.connector.Block; import cn.edu.ruc.iir.pard.executor.connector.Task; +import cn.edu.ruc.iir.pard.executor.connector.UnionTask; +import cn.edu.ruc.iir.pard.scheduler.TaskScheduler; +import cn.edu.ruc.iir.pard.scheduler.TaskState; import io.netty.channel.ChannelFuture; import io.netty.channel.ChannelFutureListener; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.ChannelInboundHandlerAdapter; +import net.sf.json.JSONObject; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; @@ -21,16 +29,21 @@ public class ExchangeTaskHandler { private final Logger logger = Logger.getLogger(ExchangeTaskHandler.class.getName()); private final PardTaskExecutor executor; - + private final Map taskMap; + private final Map> subTaskMap; + private final TaskScheduler taskScheduler; public ExchangeTaskHandler(PardTaskExecutor executor) { this.executor = executor; + this.taskMap = new HashMap(); + this.subTaskMap = new HashMap<>(); + this.taskScheduler = TaskScheduler.INSTANCE(); } @Override public void channelRead(ChannelHandlerContext ctx, Object msg) { - if (msg instanceof Task) { + if (msg instanceof Task && !(msg instanceof UnionTask)) { Task task = (Task) msg; Block block = executor.executeQuery(task); if (block.isSequenceHasNext()) { @@ -41,12 +54,45 @@ public void channelRead(ChannelHandlerContext ctx, Object msg) f.addListener(ChannelFutureListener.CLOSE); } } + else if (msg instanceof Task && msg instanceof UnionTask) { + unionTask(ctx, (UnionTask) msg); + } else { logger.log(Level.WARNING, "Exchange task handler received a message which is not a task"); ctx.close(); } } - + public void unionTask(ChannelHandlerContext ctx, UnionTask task) + { + //int p = 0; + if (subTaskMap.get(task.getTaskId()) == null) { + List list = new ArrayList(); + TaskState state = taskScheduler.executeQueryTask(task.getWaitTask(), true); + list.add(task.getTaskId()); + subTaskMap.put(task.getTaskId(), list); + taskMap.put(task.getTaskId(), state); + } + TaskState state = taskMap.get(task.getTaskId()); + boolean hasNext = true; + ChannelFuture f = null; + if (!state.isDone()) { + logger.info("waiting more blocks in exchange task handlers."); + logger.info("print task map:"); + for (String key : state.getTaskMap().keySet()) { + logger.info("task map key " + key + JSONObject.fromObject(state.getTaskMap().get(key)).toString()); + } + Block b = state.fetch(); + logger.info("fetch~"); + hasNext = b.isSequenceHasNext() || state.getTaskMap().size() > 1; + b.setSequenceHasNext(hasNext); + f = ctx.write(b); + } + if (!hasNext) { + f.addListener(ChannelFutureListener.CLOSE); + subTaskMap.put(task.getTaskId(), null); + taskMap.put(task.getTaskId(), null); + } + } @Override public void channelReadComplete(ChannelHandlerContext ctx) { diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeServer.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeServer.java index 01cfefe..0843949 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeServer.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/exchange/PardFileExchangeServer.java @@ -21,6 +21,7 @@ * * @author guodong */ + public class PardFileExchangeServer implements Runnable { diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ConditionComparator.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ConditionComparator.java index ebf80c4..1972ff4 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ConditionComparator.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ConditionComparator.java @@ -2,9 +2,9 @@ import cn.edu.ruc.iir.pard.catalog.Condition; import cn.edu.ruc.iir.pard.catalog.GddUtil; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; import cn.edu.ruc.iir.pard.commons.utils.DataType.DataTypeInt; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; -import cn.edu.ruc.iir.pard.semantic.SemanticException; import cn.edu.ruc.iir.pard.sql.tree.BooleanLiteral; import cn.edu.ruc.iir.pard.sql.tree.CharLiteral; import cn.edu.ruc.iir.pard.sql.tree.DoubleLiteral; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/PardPlanner.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/PardPlanner.java index 1742814..57fbe13 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/PardPlanner.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/PardPlanner.java @@ -1,5 +1,6 @@ package cn.edu.ruc.iir.pard.planner; +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; import cn.edu.ruc.iir.pard.planner.ddl.SchemaCreationPlan; import cn.edu.ruc.iir.pard.planner.ddl.SchemaDropPlan; import cn.edu.ruc.iir.pard.planner.ddl.SchemaShowPlan; @@ -11,7 +12,7 @@ import cn.edu.ruc.iir.pard.planner.dml.InsertPlan; import cn.edu.ruc.iir.pard.planner.dml.LoadPlan; import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; -import cn.edu.ruc.iir.pard.semantic.SemanticException; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan2; import cn.edu.ruc.iir.pard.sql.tree.CreateSchema; import cn.edu.ruc.iir.pard.sql.tree.CreateTable; import cn.edu.ruc.iir.pard.sql.tree.Delete; @@ -40,7 +41,7 @@ public class PardPlanner public static Map ast2plan = new HashMap(); public static void init() { - ast2plan.put(Query.class, QueryPlan.class); + ast2plan.put(Query.class, QueryPlan2.class); ast2plan.put(CreateSchema.class, SchemaCreationPlan.class); ast2plan.put(CreateTable.class, TableCreationPlan.class); ast2plan.put(Use.class, UsePlan.class); diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/Plan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/Plan.java index 63ac3db..586a94b 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/Plan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/Plan.java @@ -1,6 +1,7 @@ package cn.edu.ruc.iir.pard.planner; -import cn.edu.ruc.iir.pard.semantic.SemanticException; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; import cn.edu.ruc.iir.pard.sql.tree.Statement; /** @@ -12,17 +13,27 @@ public abstract class Plan { private Statement statement; private String jobId; - + private ErrorMessage msg; public Plan(Statement stmt) { statement = stmt; - ErrorMessage msg = this.semanticAnalysis(); + msg = this.semanticAnalysis(); if (msg.getErrcode() < 0) { System.err.println(msg.toString()); throw new SemanticException(msg); } } + public ErrorMessage getMsg() + { + return msg; + } + + public void setMsg(ErrorMessage msg) + { + this.msg = msg; + } + public Statement getStatment() { return statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaCreationPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaCreationPlan.java index 30d7c70..26ba31d 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaCreationPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaCreationPlan.java @@ -1,9 +1,9 @@ package cn.edu.ruc.iir.pard.planner.ddl; import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.sql.tree.CreateSchema; import cn.edu.ruc.iir.pard.sql.tree.Statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaDropPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaDropPlan.java index 515cb13..14ba8d9 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaDropPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaDropPlan.java @@ -1,9 +1,9 @@ package cn.edu.ruc.iir.pard.planner.ddl; import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.sql.tree.DropSchema; import cn.edu.ruc.iir.pard.sql.tree.Statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaShowPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaShowPlan.java index 1048596..a80ec09 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaShowPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/SchemaShowPlan.java @@ -1,6 +1,6 @@ package cn.edu.ruc.iir.pard.planner.ddl; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.sql.tree.Statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableCreationPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableCreationPlan.java index 85f8390..9cd8a95 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableCreationPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableCreationPlan.java @@ -7,12 +7,12 @@ import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Site; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.commons.utils.DataType; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.sql.tree.ColumnDefinition; import cn.edu.ruc.iir.pard.sql.tree.CreateTable; import cn.edu.ruc.iir.pard.sql.tree.QualifiedName; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableDropPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableDropPlan.java index 836503a..9d20924 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableDropPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableDropPlan.java @@ -3,9 +3,9 @@ import cn.edu.ruc.iir.pard.catalog.Fragment; import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.sql.tree.DropTable; import cn.edu.ruc.iir.pard.sql.tree.QualifiedName; import cn.edu.ruc.iir.pard.sql.tree.Statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TablePlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TablePlan.java index dc11f01..74071a2 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TablePlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TablePlan.java @@ -1,6 +1,6 @@ package cn.edu.ruc.iir.pard.planner.ddl; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.planner.GDDPlan; import cn.edu.ruc.iir.pard.sql.tree.Statement; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableShowPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableShowPlan.java index e9cadb1..9541aaf 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableShowPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/TableShowPlan.java @@ -1,8 +1,8 @@ package cn.edu.ruc.iir.pard.planner.ddl; import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.sql.tree.Identifier; import cn.edu.ruc.iir.pard.sql.tree.ShowTables; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/UsePlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/UsePlan.java index 56d9a3b..5e38080 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/UsePlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/ddl/UsePlan.java @@ -1,9 +1,9 @@ package cn.edu.ruc.iir.pard.planner.ddl; import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.planner.GDDPlan; import cn.edu.ruc.iir.pard.sql.tree.Statement; import cn.edu.ruc.iir.pard.sql.tree.Use; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/DeletePlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/DeletePlan.java index 76ab845..b989be7 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/DeletePlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/DeletePlan.java @@ -4,9 +4,9 @@ import cn.edu.ruc.iir.pard.catalog.Fragment; import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.sql.expr.ColumnItem; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/InsertPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/InsertPlan.java index 8a89407..ef25464 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/InsertPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/InsertPlan.java @@ -6,11 +6,11 @@ import cn.edu.ruc.iir.pard.catalog.GddUtil; import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; import cn.edu.ruc.iir.pard.planner.ConditionComparator; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; -import cn.edu.ruc.iir.pard.planner.ErrorMessage.ErrCode; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.sql.tree.BooleanLiteral; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/LoadPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/LoadPlan.java index f553bf8..6c0dbec 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/LoadPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/LoadPlan.java @@ -6,11 +6,11 @@ import cn.edu.ruc.iir.pard.catalog.GddUtil; import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.commons.utils.DataType; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; import cn.edu.ruc.iir.pard.planner.ConditionComparator; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.sql.tree.Load; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan.java index 6c6cd5f..ae79d1f 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan.java @@ -3,6 +3,7 @@ import cn.edu.ruc.iir.pard.catalog.Column; import cn.edu.ruc.iir.pard.catalog.Fragment; import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; @@ -17,7 +18,6 @@ import cn.edu.ruc.iir.pard.executor.connector.node.TableScanNode; import cn.edu.ruc.iir.pard.executor.connector.node.UnionNode; import cn.edu.ruc.iir.pard.planner.EarlyStopPlan; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.sql.expr.ColumnItem; diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan2.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan2.java new file mode 100644 index 0000000..579b2ef --- /dev/null +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryPlan2.java @@ -0,0 +1,887 @@ +package cn.edu.ruc.iir.pard.planner.dml; + +import cn.edu.ruc.iir.pard.catalog.Column; +import cn.edu.ruc.iir.pard.catalog.Condition; +import cn.edu.ruc.iir.pard.catalog.Fragment; +import cn.edu.ruc.iir.pard.catalog.GddUtil; +import cn.edu.ruc.iir.pard.catalog.Schema; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; +import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; +import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; +import cn.edu.ruc.iir.pard.etcd.dao.TableDao; +import cn.edu.ruc.iir.pard.executor.connector.node.DistinctNode; +import cn.edu.ruc.iir.pard.executor.connector.node.FilterNode; +import cn.edu.ruc.iir.pard.executor.connector.node.JoinNode; +import cn.edu.ruc.iir.pard.executor.connector.node.LimitNode; +import cn.edu.ruc.iir.pard.executor.connector.node.NodeHelper; +import cn.edu.ruc.iir.pard.executor.connector.node.OutputNode; +import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; +import cn.edu.ruc.iir.pard.executor.connector.node.ProjectNode; +import cn.edu.ruc.iir.pard.executor.connector.node.SortNode; +import cn.edu.ruc.iir.pard.executor.connector.node.TableScanNode; +import cn.edu.ruc.iir.pard.executor.connector.node.UnionNode; +import cn.edu.ruc.iir.pard.planner.EarlyStopPlan; +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; +import cn.edu.ruc.iir.pard.sql.expr.ColumnItem; +import cn.edu.ruc.iir.pard.sql.expr.Expr; +import cn.edu.ruc.iir.pard.sql.expr.Expr.LogicOperator; +import cn.edu.ruc.iir.pard.sql.expr.FalseExpr; +import cn.edu.ruc.iir.pard.sql.expr.SingleExpr; +import cn.edu.ruc.iir.pard.sql.expr.TrueExpr; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; +import cn.edu.ruc.iir.pard.sql.tree.AllColumns; +import cn.edu.ruc.iir.pard.sql.tree.ComparisonExpression; +import cn.edu.ruc.iir.pard.sql.tree.DereferenceExpression; +import cn.edu.ruc.iir.pard.sql.tree.Expression; +import cn.edu.ruc.iir.pard.sql.tree.Identifier; +import cn.edu.ruc.iir.pard.sql.tree.Join; +import cn.edu.ruc.iir.pard.sql.tree.Query; +import cn.edu.ruc.iir.pard.sql.tree.QueryBody; +import cn.edu.ruc.iir.pard.sql.tree.QuerySpecification; +import cn.edu.ruc.iir.pard.sql.tree.Relation; +import cn.edu.ruc.iir.pard.sql.tree.Select; +import cn.edu.ruc.iir.pard.sql.tree.SelectItem; +import cn.edu.ruc.iir.pard.sql.tree.SingleColumn; +import cn.edu.ruc.iir.pard.sql.tree.SortItem; +import cn.edu.ruc.iir.pard.sql.tree.Statement; +import cn.edu.ruc.iir.pard.sql.tree.Table; +import cn.edu.ruc.iir.pard.web.PardServlet; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.logging.Logger; + +/** + * Query Plan + * A query statement can be translated to a query plan. + * @author hagen + */ +public class QueryPlan2 + extends QueryPlan implements EarlyStopPlan +{ + private Logger logger = Logger.getLogger(QueryPlan2.class.getName()); + private PlanNode node; + private boolean alreadyDone; + private Optional limit; + private Optional sort; + private Optional distinct; + private ProjectNode project; + private Optional join; + private Optional filter; + + // for projection + private Map alias2col; // 列名别名与列的对应 + private Map colAlias2tbl; // 列名的别名与表的对应关系 + private Map alias2tbl; + private List colList; // projection中列的顺序 + + // for from + private Map fullColAlias2col; // 使用到的所有的表的列 key: tbl.col[string] value: col[Column] + private Map col2tbl; //使用到的所有的表的列 key: tbl.col[string] value: tbl[string] + private Map> col2tblList; + private Map tbl2schema; //使用到的表与schema的对应关系 + private Map catalog; // 使用到的所有表 + private Map tableDaoMap; + //for site + private Map> tbl2site; + + // private UnionNode union; + private static SiteDao siteDao; + private boolean siteMayMissing; + private List aliveSite; + + private List tableList; // 使用的表的顺序 + + private boolean selectPushDown; + public QueryPlan2(Statement stmt) + { + super(stmt); + } + private QueryPlan2(PlanNode p, int k) + { + super(new SqlParser().createStatement("select * from customer where rank =" + k)); + node = p; + } + public PlanNode getPlan() + { + return node; + } + + @Override + public ErrorMessage semanticAnalysis() + { + siteDao = new SiteDao(); + aliveSite = new ArrayList(); + aliveSite.addAll(siteDao.listNodes().keySet()); + siteMayMissing = false; + catalog = new HashMap<>(); + alias2col = new HashMap(); + colAlias2tbl = new HashMap(); + tbl2schema = new HashMap(); + col2tbl = new HashMap(); + tableList = new ArrayList<>(); + colList = new ArrayList<>(); + logger = Logger.getLogger(QueryPlan2.class.getName()); + node = new OutputNode(); + selectPushDown = false; + tbl2site = new HashMap<>(); + fullColAlias2col = new HashMap<>(); + col2tblList = new HashMap<>(); + tableDaoMap = new HashMap<>(); + + ColumnItem.clearCol2TblMap(); + alias2tbl = ColumnItem.getCol2TblMap(); + this.limit = Optional.ofNullable(null); + this.sort = Optional.ofNullable(null); + this.distinct = Optional.ofNullable(null); + this.project = null; + this.filter = Optional.ofNullable(null); + this.join = Optional.ofNullable(null); + // this.union = null; + PlanNode currentNode = this.node; + //System.out.println("currentNode" + currentNode); + // get real objects + Query query = (Query) this.getStatment(); + QueryBody queryBody = query.getQueryBody(); + if (!(queryBody instanceof QuerySpecification)) { + return ErrorMessage.throwMessage(ErrorMessage.ErrCode.UnSupportedQuery, "Query Body is not a query specification!"); + } + QuerySpecification querySpecification = (QuerySpecification) queryBody; + Select select = querySpecification.getSelect(); + if (querySpecification.getGroupBy().isPresent()) { + return ErrorMessage.throwMessage(ErrorMessage.ErrCode.UnSupportedQuery, " Group by not supported at present!"); + } + if (querySpecification.getHaving().isPresent()) { + return ErrorMessage.throwMessage(ErrorMessage.ErrCode.UnSupportedQuery, " Having is not supported!"); + } + if (!querySpecification.getFrom().isPresent()) { + return ErrorMessage.throwMessage(ErrorMessage.ErrCode.UnSupportedQuery, " FROM is missing!"); + } + //check for schema + String schemaName = null; + //boolean checkSchema = false; + Schema schema = null; + schema = UsePlan.getCurrentSchema(); + if (schema != null) { + schemaName = schema.getName(); + //checkSchema = true; + } + // check for projection + //check for table + Relation from = querySpecification.getFrom().get(); + checkRelation(from, schemaName, schema); + fillAlias2tbl(); + currentNode = checkLimit(query, currentNode); + currentNode = checkOrderBy(query, currentNode); + currentNode = checkProjectionAndDistinct(select, currentNode); + currentNode = checkFilter(querySpecification, currentNode); + if (tableList.size() == 1) { + PlanNode unionNode = localization(tableList.get(0)); + currentNode.setChildren(unionNode, true, true); + currentNode = unionNode; + } + else { + PlanNode joinNode = join(); + joinNode = formatUnion(joinNode); + currentNode.setChildren(joinNode, true, true); + currentNode = joinNode; + } + logger.info("Parsed query plan: " + this.node.toString()); + if (!siteMayMissing) { + return ErrorMessage.getOKMessage(); + } + else { + return ErrorMessage.throwMessage(ErrCode.SomeSiteDown); + } + } + public void fillAlias2tbl() + { + for (String key : col2tblList.keySet()) { + List tb = col2tblList.get(key); + if (tb.size() == 1) { + alias2tbl.put(key, tb.get(0)); + } + } + } + public PlanNode formatUnion(PlanNode node) + { + if (node instanceof UnionNode) { + boolean needRec = false; + UnionNode union = new UnionNode(); + UnionNode old = (UnionNode) node; + for (PlanNode p : old.getUnionChildren()) { + if (p instanceof UnionNode) { + UnionNode pp = (UnionNode) p; + needRec = true; + for (PlanNode n : pp.getUnionChildren()) { + union.addUnionChild(n); + } + } + else { + union.addUnionChild(p); + } + } + if (needRec) { + return formatUnion(union); + } + return union; + } + else { + return NodeHelper.copyNode(node); + } + } + //TODO:根据Expr里的条件,确定Join树 + public PlanNode join() + { + /* + JoinNode join = new JoinNode(); + for (String tableName : tableList) { + PlanNode node = localization(tableName); + if (node != null) { + join.addJoinChild(node); + } + else { + return null; + } + } + List singleExprs = Expr.extractTableJoinExpr(Expr.parse(filter.get().getExpression())); + for (SingleExpr s : singleExprs) { + join.getExprList().add((ComparisonExpression) s.toExpression()); + } + return join; + */ + Set unjoinTable = new HashSet(); + unjoinTable.addAll(tableList); + PlanNode p = localization(tableList.get(0)); + Set joinTable = new HashSet(); + joinTable.add(tableList.get(0)); + unjoinTable.remove(tableList.get(0)); + List singleExprs = Expr.extractTableJoinExpr(Expr.parse(filter.get().getExpression())); + // System.out.println(singleExprs); + while (unjoinTable.size() > 0) { + String waitJoinTableName = null; + SingleExpr joinExpr = null; + for (String t1 : joinTable) { + for (String t2 : unjoinTable) { + for (SingleExpr se : singleExprs) { + ColumnItem ci1 = (ColumnItem) se.getLvalue(); + ColumnItem ci2 = (ColumnItem) se.getRvalue(); + String t11 = ci1.getTableName(); + String t22 = ci2.getTableName(); + //System.out.println("t11" + t11 + " t22 " + t22 + " t1 " + t1 + "t2 " + t2); + if ((t11.equalsIgnoreCase(t1) && t22.equalsIgnoreCase(t2)) || (t11.equalsIgnoreCase(t2) && t22.equalsIgnoreCase(t1))) { + waitJoinTableName = t2; + joinExpr = se; + break; + } + } + } + } + if (joinExpr != null && waitJoinTableName != null) { + joinTable.add(waitJoinTableName); + unjoinTable.remove(waitJoinTableName); + singleExprs.remove(joinExpr); + JoinNode jn = new JoinNode(); + jn.addJoinChild(p); + jn.addJoinChild(localization(waitJoinTableName)); + p = jn; + jn.getExprList().add((ComparisonExpression) joinExpr.toExpression()); + } + else { + throw new SemanticException(ErrCode.UnSupportedQuery, "Not Equal Join"); + } + } + if (p instanceof JoinNode) { + //return p; + return pushDownJoin((JoinNode) p); + } + else { + return p; + } + } + private int cnt = 0; + public PlanNode pushDownJoin(JoinNode node) + { + int k = cnt++; + node = (JoinNode) NodeHelper.copyNode(node); + //new QueryPlan2(node, k).afterExecution(true); + if (node.getExprList().isEmpty()) { + PlanNode o = NodeHelper.copyNode(node); + //new QueryPlan2(o, 3000 + k).afterExecution(true); + return o; + } + JoinNode oldNode = node; + List joinChildren = node.getJoinChildren(); + if (joinChildren.size() == 1) { + PlanNode p = NodeHelper.copyNode(joinChildren.get(0)); + //new QueryPlan2(p, 1000 + k).afterExecution(true); + return p; + } + else if (joinChildren.size() == 2) { + int unionIndex = -1; + for (int i = 0; i < joinChildren.size(); i++) { + PlanNode chd = joinChildren.get(i); + if (chd instanceof UnionNode) { + // UnionNode cu = (UnionNode) chd; + unionIndex = i; + break; + } + } + if (unionIndex >= 0) { + UnionNode union = (UnionNode) joinChildren.get(unionIndex); + PlanNode other = joinChildren.get(1 - unionIndex); + PlanNode o = pushDownJoin(union, other, oldNode); + //new QueryPlan2(o, 2000 + k).afterExecution(true); + return o; + } + else { + return checkPruneJoin(node); + } + } + return NodeHelper.copyNode(node); + } + + public PlanNode checkPruneJoin(JoinNode node) + { + node.setOtherInfo("mark"); + if (node.getJoinChildren().size() != 2) { + node.setOtherInfo("mark" + node.getJoinChildren().size()); + return node; + } + Expression expr = node.getExprList().get(0); + FilterNode node1 = findExpression(node.getJoinChildren().get(0)); + FilterNode node2 = findExpression(node.getJoinChildren().get(1)); + if (node1 == null || node2 == null) { + return node; + } + Expression expr1 = node1.getExpression(); + Expression expr2 = node2.getExpression(); + if (expr != null && expr1 != null && expr2 != null) { + SingleExpr se = (SingleExpr) Expr.parse(expr); + Expr e1 = Expr.parse(expr1); + Expr e2 = Expr.parse(expr2); + ColumnItem lv = (ColumnItem) se.getLvalue(); + ColumnItem rv = (ColumnItem) se.getRvalue(); + e1 = Expr.replace(e1, (ColumnItem) se.getLvalue(), (ColumnItem) se.getRvalue()); + e2 = Expr.replace(e2, (ColumnItem) se.getLvalue(), (ColumnItem) se.getRvalue()); + Expr res = Expr.and(e1, e2, LogicOperator.AND); + if (res instanceof FalseExpr) { + //System.out.println("Flase e1 " + e1.toString() + " e2 " + e2.toString() + "se " + se + " res " + res); + return null; + } + else { + //System.out.println("ke1 " + e1.toString() + " e2 " + e2.toString() + "se " + se + " res " + res); + } + return node; + } + else { + //System.out.println("?e1 " + expr1 + " e2 " + expr2 + "se " + expr); + } + return node; + } + public FilterNode findExpression(PlanNode node) + { + while (!(node instanceof FilterNode)) { + if (node instanceof UnionNode || node instanceof JoinNode) { + return null; + } + if (node == null) { + return null; + } + node = node.getLeftChild(); + } + if (node instanceof FilterNode) { + return ((FilterNode) node); + } + return null; + } + public PlanNode pushDownJoin(UnionNode union, PlanNode others, JoinNode oldNode) + { + // 考虑union为0和1时 + UnionNode ret = new UnionNode(); + List children = union.getUnionChildren(); + if (others instanceof JoinNode) { + others = pushDownJoin((JoinNode) others); + if (others == null) { + return null; + } + } + if (children.isEmpty()) { + return union; + } + for (PlanNode node : children) { + JoinNode joins = new JoinNode(); + joins.addJoinChild(NodeHelper.copyNode(node)); + joins.addJoinChild(NodeHelper.copyNode(others)); + joins.getJoinSet().addAll(oldNode.getJoinSet()); + joins.getExprList().addAll(oldNode.getExprList()); + PlanNode pn = pushDownJoin(joins); + if (pn != null) { + ret.getUnionChildren().add(pn); + } + //ret.getUnionChildren().add(joins); + //TODO 进行搜索 如果没有Union结点,且没有带expr的join结点,则考虑减枝 + } + if (ret.getUnionChildren().size() == 0) { + return ret; + } + else if (ret.getUnionChildren().size() == 1) { + return ret.getUnionChildren().get(0); + } + else { + return ret; + } + } + public PlanNode localization(String fromTableName) + { + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(fromTableName); + if (catalogTable.getFragment().values().iterator().next().getFragmentType() == GddUtil.fragementHORIZONTIAL) { + UnionNode union = horizonLocalization(fromTableName); + if (union.getUnionChildren().isEmpty()) { + return null; + } + return union; + } + else { + JoinNode join = verticalLocalization(fromTableName); + if (join.getJoinChildren().isEmpty()) { + return null; + } + return join; + } + } + + public PlanNode checkFilter(QuerySpecification querySpecification, PlanNode currentNode) + { + if (querySpecification.getWhere().isPresent()) { + Expression filterExpr = querySpecification.getWhere().get(); + FilterNode filterNode = new FilterNode(filterExpr); + filter = Optional.of(filterNode); + currentNode.setChildren(filterNode, true, true); + currentNode = filterNode; + } + return currentNode; + } + public PlanNode checkProjectionAndDistinct(Select select, PlanNode currentNode) + { + List selectItems = select.getSelectItems(); + //List columns = new ArrayList<>(); + for (SelectItem selectItem : selectItems) { + if (selectItem instanceof AllColumns) { + colList.clear(); + selectPushDown = true; + colList.addAll(fullColAlias2col.values()); + colAlias2tbl.clear(); + alias2col.clear(); + alias2tbl.clear(); + for (String table : tableList) { + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(table); + for (Column c : catalogTable.getColumns().values()) { + colAlias2tbl.put(c.getColumnName(), catalogTable); + alias2col.put(c.getColumnName(), c); + alias2tbl.put(c.getColumnName(), catalogTable.getTablename()); + } + } + + break; + } + else { + Expression expression = ((SingleColumn) selectItem).getExpression(); + if (expression instanceof Identifier) { + //throw new SemanticException(ErrorMessage.ErrCode.UnSupportedQuery, "expession " + expression.toString() + " is not supported! class " + expression.getClass().getName()); + //TODO: 处理 + Identifier identifier = (Identifier) expression; + String alias = identifier.getValue(); + List tblColLists1 = col2tblList.get(alias); + if (tblColLists1 == null) { + throw new SemanticException(ErrCode.ColumnInTableNotFound, identifier.getValue(), " multi-table"); + } + else if (tblColLists1.size() >= 2) { + throw new SemanticException(ErrCode.ColumnNameIsAmbiguous, identifier.getValue(), tblColLists1.get(0) + " and " + tblColLists1.get(1)); + } + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(tblColLists1.get(0)); + if (catalogTable.getColumns().keySet().contains(identifier.getValue())) { + Column col = catalogTable.getColumns().get(identifier.getValue()); + colList.add(col); + if (colAlias2tbl.get(alias) != null) { + throw new SemanticException(ErrCode.ColumnNameIsAmbiguous, identifier.getValue(), colAlias2tbl.get(alias).getTablename() + " and " + catalogTable.getTablename()); + } + colAlias2tbl.put(alias, catalogTable); + alias2col.put(alias, col); + alias2tbl.put(alias, catalogTable.getTablename()); + } + else { + throw new SemanticException(ErrCode.ColumnInTableNotFound, identifier.getValue(), catalogTable); + } + } + else if (expression instanceof DereferenceExpression) { + DereferenceExpression exp = (DereferenceExpression) expression; + Identifier identifier = exp.getField(); + String tblName = exp.getBase().toString(); + String alias = exp.toString(); + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(tblName.toLowerCase()); + if (catalogTable.getColumns().keySet().contains(identifier.getValue())) { + Column col = catalogTable.getColumns().get(identifier.getValue()); + colList.add(col); + if (colAlias2tbl.get(alias) != null) { + throw new SemanticException(ErrCode.ColumnNameIsAmbiguous, identifier.getValue(), colAlias2tbl.get(alias).getTablename() + " and " + catalogTable.getTablename()); + } + colAlias2tbl.put(alias, catalogTable); + alias2col.put(alias, col); + alias2tbl.put(alias, catalogTable.getTablename()); + } + } + else { + throw new SemanticException(ErrorMessage.ErrCode.UnSupportedQuery, "expession " + expression.toString() + " is not supported! class " + expression.getClass().getName()); + } + } + } + if (select.isDistinct()) { + DistinctNode distinctNode = new DistinctNode(colList); + distinct = Optional.of(distinctNode); + currentNode.setChildren(distinctNode, true, true); + currentNode = distinctNode; + } + ProjectNode projectNode = new ProjectNode(colList); + currentNode.setChildren(projectNode, true, true); + project = projectNode; + currentNode = projectNode; + return currentNode; + } + public PlanNode checkOrderBy(Query query, PlanNode currentNode) + { + if (query.getOrderBy().isPresent()) { + SortNode sortNode = new SortNode(); + for (SortItem sortItem : query.getOrderBy().get().getSortItems()) { + Identifier sortKey = (Identifier) sortItem.getSortKey(); + if (col2tbl.get(sortKey.getValue()) != null) { + Column sortCol = catalog.get(col2tbl.get(sortKey.getValue())).getColumns().get(sortKey.getValue()); + sortNode.addSort(sortCol, sortItem.getOrdering() == SortItem.Ordering.ASCENDING); + } + else { + throw new SemanticException(ErrorMessage.ErrCode.ColumnInTableNotFound); + } + } + currentNode.setChildren(sortNode, true, true); + sort = Optional.of(sortNode); + currentNode = sortNode; + } + return currentNode; + } + public PlanNode checkLimit(Query query, PlanNode currentNode) + { + if (query.getLimit().isPresent()) { + int limitVal; + try { + limitVal = Integer.parseInt(query.getLimit().get()); + } + catch (Exception e) { + throw new SemanticException(ErrorMessage.ErrCode.LimitIsNotANumber); + } + LimitNode limitNode = new LimitNode(limitVal); + limit = Optional.of(limitNode); + currentNode.setChildren(limitNode, true, true); + currentNode = limitNode; + } + return currentNode; + } + public void checkRelation(Relation from, String defaultSchema, Schema schema) + { + if (from instanceof Table) { + Table fromTable = (Table) from; + checkTable(fromTable, defaultSchema, schema); + } + else if (from instanceof Join) { + Join join = (Join) from; + checkJoin(join, defaultSchema, schema); + } + else { + throw new SemanticException(ErrorMessage.ErrCode.UnSupportedQuery, " FROM clause " + from.getClass().getName() + " is not supported!!"); + } + } + public void checkJoin(Join join, String defaultSchema, Schema schema) + { + Relation lr = join.getLeft(); + Relation rr = join.getRight(); + checkRelation(lr, defaultSchema, schema); + checkRelation(rr, defaultSchema, schema); + } + public cn.edu.ruc.iir.pard.catalog.Table checkTable(Table fromTable, String defaultSchema, Schema schema) + { + boolean checkSchema = (defaultSchema != null); + String schemaName = defaultSchema; + if (fromTable.getName().getPrefix().isPresent()) { + schemaName = fromTable.getName().getPrefix().get().toString(); + checkSchema = false; + } + String fromTableName = fromTable.getName().getSuffix(); + if (schemaName == null) { + throw new SemanticException(ErrorMessage.ErrCode.SchemaNotSpecified); + } + if (!checkSchema) { + SchemaDao schemaDao = new SchemaDao(); + schema = schemaDao.loadByName(schemaName); + if (schema == null) { + throw new SemanticException(ErrorMessage.ErrCode.SchemaNotExsits, schemaName); + } + } + List siteList = new ArrayList(); + TableDao tableDao = tableDaoMap.get(schemaName); + if (tableDao == null) { + tableDao = new TableDao(schema); + tableDaoMap.put(schemaName, tableDao); + } + // check table + int pos = fromTableName.indexOf("@"); + boolean needLoad = false; + if (pos > 0) { + String site = fromTableName.substring(pos + 1); + if (!aliveSite.contains(site)) { + throw new SemanticException(ErrorMessage.ErrCode.SiteNotExist, site); + } + siteList.add(site); + fromTableName = fromTableName.substring(0, pos); + } + else { + needLoad = true; + } + cn.edu.ruc.iir.pard.catalog.Table catalogTable = tableDao.loadByName(fromTableName); + if (catalogTable == null) { + throw new SemanticException(ErrorMessage.ErrCode.TableNotExists, schemaName + "." + fromTableName); + } + // check site + tbl2site.put(fromTableName, siteList); + if (needLoad) { + for (Fragment frag : catalogTable.getFragment().values()) { + siteList.add(frag.getSiteName()); + if (!aliveSite.contains(frag.getSiteName())) { + siteMayMissing = true; + } + } + if (siteList.isEmpty()) { + throw new SemanticException(ErrorMessage.ErrCode.AllSiteDown, fromTableName); + } + } + for (Column col : catalogTable.getColumns().values()) { + col2tbl.put(fromTableName + "." + col.getColumnName(), fromTableName); + fullColAlias2col.put(fromTableName + "." + col.getColumnName(), col); + List tblList = col2tblList.get(col.getColumnName()); + if (tblList == null) { + tblList = new ArrayList(); + col2tblList.put(col.getColumnName(), tblList); + } + tblList.add(catalogTable.getTablename()); + } + //prepared for next step. + catalog.put(fromTableName, catalogTable); + tbl2schema.put(fromTableName, schemaName); + tableList.add(fromTableName); + return catalogTable; + } + public JoinNode verticalLocalization(String fromTableName) + { + List siteList = tbl2site.get(fromTableName); + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(fromTableName); + JoinNode joinNode = new JoinNode(); + Set tblCol = new HashSet(); + tblCol.addAll(extractColumnNameFromProjection(catalogTable)); + tblCol.addAll(extractColumnNameFromFilter(catalogTable)); + Map cntCol = new HashMap(); + for (Fragment frag : catalogTable.getFragment().values()) { + if (!siteList.contains(frag.getSiteName())) { + continue; + } + List projectColumn = new ArrayList(); + List strColumn = new ArrayList(); + for (Condition cond : frag.getCondition()) { + Integer cnt = cntCol.get(cond.getColumnName()); + if (cnt == null) { + cnt = 0; + } + cnt++; + cntCol.put(cond.getColumnName(), cnt); + Column col = catalogTable.getColumns().get(cond.getColumnName()); + if (tblCol.contains(col.getColumnName())) { + projectColumn.add(col); + strColumn.add(col.getColumnName()); + } + } + if (projectColumn.isEmpty()) { + continue; + } + PlanNode childrenNode = null; + PlanNode root = null; + ProjectNode proj = new ProjectNode(projectColumn); + childrenNode = proj; + root = proj; + if (filter.isPresent()) { + // TODO : 垂直投影filter下推 + Expr filterExpr = Expr.parse(filter.get().getExpression()); + filterExpr = Expr.extractTableFilter(filterExpr, fromTableName); + filterExpr = Expr.extractTableColumnFilter(filterExpr, strColumn); + if (!(filterExpr instanceof TrueExpr)) { + FilterNode subFilter = new FilterNode(filterExpr.toExpression()); + childrenNode.setChildren(subFilter, true, true); + childrenNode = subFilter; + } + } + TableScanNode scan = new TableScanNode(tbl2schema.get(fromTableName), fromTableName, frag.getSiteName()); + childrenNode.setChildren(scan, true, true); + joinNode.addJoinChild(root); + } + String mCol = ""; + int maxC = -1; + for (String key : cntCol.keySet()) { + Integer v = cntCol.get(key); + if (v > maxC) { + maxC = v; + mCol = key; + } + } + joinNode.getJoinSet().add(mCol); + return formatVerticalJoin(joinNode); + } + public JoinNode formatVerticalJoin(JoinNode node) + { + JoinNode join = new JoinNode(); + join.getJoinSet().addAll(node.getJoinSet()); + for (PlanNode p : node.getJoinChildren()) { + if (p instanceof ProjectNode) { + ProjectNode pn = (ProjectNode) p; + boolean contains = true; + for (Column c : pn.getColumns()) { + if (!join.getJoinSet().contains(c.getColumnName())) { + contains = false; + break; + } + } + if (contains) { + continue; + } + } + join.addJoinChild(p); + } + if (join.getJoinChildren().isEmpty() && !node.getJoinChildren().isEmpty()) { + join.getJoinChildren().add(node.getJoinChildren().get(0)); + } + return join; + } + public List extractColumnNameFromFilter(cn.edu.ruc.iir.pard.catalog.Table table) + { + String tblName = table.getTablename(); + List list = new ArrayList(); + if (filter.isPresent()) { + Expr expr = Expr.parse(filter.get().getExpression()); + return Expr.extractTableColumn(expr, tblName); + } + return list; + } + public List extractColumnNameFromProjection(cn.edu.ruc.iir.pard.catalog.Table table) + { + List arrayList = new ArrayList(); + for (String key : colAlias2tbl.keySet()) { + cn.edu.ruc.iir.pard.catalog.Table t = colAlias2tbl.get(key); + if (t.getTablename().equals(table.getTablename())) { + String col = alias2col.get(key).getColumnName(); + arrayList.add(col); + } + } + return arrayList; + } + public UnionNode horizonLocalization(String fromTableName) + { + //TableDao tdao = tableDaoMap.get(tbl2schema.get(fromTableName)); + List siteList = tbl2site.get(fromTableName); + UnionNode unionNode = new UnionNode(); + cn.edu.ruc.iir.pard.catalog.Table catalogTable = catalog.get(fromTableName); + //union = unionNode; + for (Fragment frag : catalogTable.getFragment().values()) { + if (!siteList.contains(frag.getSiteName())) { + continue; + } + Expr expr = Expr.parse(frag.getCondition(), fromTableName); + PlanNode childrenNode = new TableScanNode(tbl2schema.get(fromTableName), fromTableName, frag.getSiteName()); + if (filter.isPresent()) { + //TODO: 从expr2中选择自己的 + Expr expr2 = Expr.extractTableFilter(Expr.parse(filter.get().getExpression()), fromTableName); + Expr merge = Expr.and(expr, expr2, LogicOperator.AND); + if (merge instanceof TrueExpr) { + // do nothing. + } + else if (merge instanceof FalseExpr) { + continue; + } + else { + //merge = Expr.and(expr, expr2, LogicOperator.OR); + FilterNode childrenFilter = new FilterNode(merge.toExpression()); + childrenFilter.setChildren(childrenNode, true, true); + childrenNode = childrenFilter; + } + } + if (project != null) { + //TODO: 选择自己表里元素下推 + Set tblCol = new HashSet(); + tblCol.addAll(extractColumnNameFromProjection(catalogTable)); + tblCol.addAll(extractColumnNameFromFilter(catalogTable)); + List singleTableProjection = new ArrayList(); + //System.out.println(tblCol); + for (Column col : catalogTable.getColumns().values()) { + if (tblCol.contains(col.getColumnName())) { + singleTableProjection.add(col); + //System.out.println("table " + fromTableName + " add projection " + col.getColumnName()); + } + else { + //System.out.println("table " + fromTableName + " not add projection " + col.getColumnName()); + } + } + ProjectNode pnode = new ProjectNode(singleTableProjection); + pnode.setChildren(childrenNode, true, true); + childrenNode = pnode; + } + else { + ProjectNode pnode = new ProjectNode(new ArrayList(catalogTable.getColumns().values())); + pnode.setChildren(childrenNode, true, true); + childrenNode = pnode; + } + if (distinct.isPresent()) { + //TODO: 选择自己表里元素distinct + DistinctNode dnode = new DistinctNode(distinct.get().getColumns()); + dnode.setChildren(childrenNode, true, true); + childrenNode = dnode; + } + unionNode.addUnionChild(childrenNode); + } + if (unionNode.getUnionChildren().isEmpty()) { + //this.alreadyDone = true; + } + return unionNode; + } + public PlanNode optimize() + { + return node; + } + + @Override + public boolean isAlreadyDone() + { + return alreadyDone; + } + + public HashMap getDistributionHints() + { + return new HashMap<>(); + } + @Override + public boolean afterExecution(boolean executeSuccess) + { + PardServlet.planList.add(this); + return true; + } +} diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryTestPlan.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryTestPlan.java new file mode 100644 index 0000000..ecd7dbf --- /dev/null +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/planner/dml/QueryTestPlan.java @@ -0,0 +1,27 @@ +package cn.edu.ruc.iir.pard.planner.dml; + +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; +import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; + +public class QueryTestPlan + extends QueryPlan +{ + private PlanNode node; + public QueryTestPlan(PlanNode node, String info) + { + super(new SqlParser().createStatement("select * from " + info)); + this.node = node; + } + + @Override + public ErrorMessage semanticAnalysis() + { + return new ErrorMessage(); + } + + public PlanNode getPlan() + { + return node; + } +} diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/QueryJobExecutor.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/QueryJobExecutor.java new file mode 100644 index 0000000..92b1846 --- /dev/null +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/QueryJobExecutor.java @@ -0,0 +1,142 @@ +package cn.edu.ruc.iir.pard.scheduler; + +import cn.edu.ruc.iir.pard.catalog.Site; +import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; +import cn.edu.ruc.iir.pard.exchange.PardExchangeClient; +import cn.edu.ruc.iir.pard.executor.connector.Block; +import cn.edu.ruc.iir.pard.executor.connector.JoinTask; +import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; +import cn.edu.ruc.iir.pard.executor.connector.SendDataTask; +import cn.edu.ruc.iir.pard.executor.connector.Task; +import cn.edu.ruc.iir.pard.planner.Plan; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.logging.Level; +import java.util.logging.Logger; + +public class QueryJobExecutor +{ + private Job queryJob; + private Plan plan = null; + private List tasks = null; + private List sendDataTask = new ArrayList(); + private List joinTask = new ArrayList(); + private List otherTask = new ArrayList(); + private Logger logger = Logger.getLogger(QueryJobExecutor.class.getName()); + private SiteDao siteDao = new SiteDao(); + private Map taskMap = null; + private int realNodeNum = 0; + public QueryJobExecutor(Job job) + { + taskMap = new HashMap(); + queryJob = job; + plan = job.getPlan(); + tasks = job.getTasks(); + init(); + } + public void init() + { + for (Task task : tasks) { + if (task instanceof SendDataTask) { + sendDataTask.add((SendDataTask) task); + continue; + } + if (task instanceof JoinTask) { + joinTask.add((JoinTask) task); + continue; + } + else { + otherTask.add(task); + } + } + } + + private void executeFirstPhase() + { + BlockingQueue blocks = new LinkedBlockingQueue<>(); + for (SendDataTask task : sendDataTask) { + String site = task.getSite(); + String taskId = task.getTaskId(); + Site nodeSite = siteDao.listNodes().get(site); + if (nodeSite == null) { + logger.log(Level.SEVERE, "Node " + site + " is not active. Please check."); + //return PardResultSet.execErrResultSet; + } + else { + realNodeNum++; + PardExchangeClient client = new PardExchangeClient(nodeSite.getIp(), nodeSite.getExchangePort()); + client.connect(task, blocks); + taskMap.put(taskId, task); + } + } + while (!taskMap.isEmpty()) { + Block block = null; + try { + block = blocks.poll(8000, TimeUnit.MILLISECONDS); + } + catch (InterruptedException e) { + e.printStackTrace(); + } + if (block == null) { + logger.info("Waiting for more blocks..."); + continue; + } + else { + String taskId = block.getTaskId(); + taskMap.remove(taskId); + logger.info("Task " + taskId + " done."); + } + } + } + + public PardResultSet execute() + { + executeFirstPhase(); + PardResultSet resultSet = new PardResultSet(); + List secondList = new ArrayList(); + secondList.addAll(otherTask); + secondList.addAll(joinTask); + BlockingQueue blocks = new LinkedBlockingQueue<>(); + for (Task task : secondList) { + String site = task.getSite(); + String taskId = task.getTaskId(); + Site nodeSite = siteDao.listNodes().get(site); + if (nodeSite == null) { + logger.log(Level.SEVERE, "Node " + site + " is not active. Please check."); + return PardResultSet.execErrResultSet; + } + PardExchangeClient client = new PardExchangeClient(nodeSite.getIp(), nodeSite.getExchangePort()); + client.connect(task, blocks); + taskMap.put(taskId, task); + } + // wait for all tasks done + while (!taskMap.isEmpty()) { + Block block = null; + try { + block = blocks.poll(8000, TimeUnit.MILLISECONDS); + } + catch (InterruptedException e) { + e.printStackTrace(); + } + if (block == null) { + logger.info("Waiting for more blocks..."); + continue; + } + resultSet.addBlock(block); + logger.info("Added block " + block.getSequenceId() + ", num of rows: " + block.getRows().size()); + if (!block.isSequenceHasNext()) { + String taskId = block.getTaskId(); + taskMap.remove(taskId); + logger.info("Task " + taskId + " done."); + } + } + plan.afterExecution(true); + return resultSet; + } +} diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskScheduler.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskScheduler.java index 1ba0146..db7408a 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskScheduler.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskScheduler.java @@ -4,6 +4,8 @@ import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Site; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage.ErrCode; +import cn.edu.ruc.iir.pard.commons.exception.TaskSchedulerException; import cn.edu.ruc.iir.pard.commons.utils.DataType; import cn.edu.ruc.iir.pard.commons.utils.RowConstructor; import cn.edu.ruc.iir.pard.communication.rpc.PardRPCClient; @@ -18,12 +20,18 @@ import cn.edu.ruc.iir.pard.executor.connector.DropSchemaTask; import cn.edu.ruc.iir.pard.executor.connector.DropTableTask; import cn.edu.ruc.iir.pard.executor.connector.InsertIntoTask; +import cn.edu.ruc.iir.pard.executor.connector.JoinTask; import cn.edu.ruc.iir.pard.executor.connector.LoadTask; import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; import cn.edu.ruc.iir.pard.executor.connector.QueryTask; +import cn.edu.ruc.iir.pard.executor.connector.SendDataTask; import cn.edu.ruc.iir.pard.executor.connector.Task; +import cn.edu.ruc.iir.pard.executor.connector.node.FilterNode; +import cn.edu.ruc.iir.pard.executor.connector.node.JoinNode; import cn.edu.ruc.iir.pard.executor.connector.node.NodeHelper; +import cn.edu.ruc.iir.pard.executor.connector.node.OutputNode; import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; +import cn.edu.ruc.iir.pard.executor.connector.node.ProjectNode; import cn.edu.ruc.iir.pard.executor.connector.node.TableScanNode; import cn.edu.ruc.iir.pard.executor.connector.node.UnionNode; import cn.edu.ruc.iir.pard.planner.Plan; @@ -39,13 +47,19 @@ import cn.edu.ruc.iir.pard.planner.dml.LoadPlan; import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; import cn.edu.ruc.iir.pard.server.PardStartupHook; +import cn.edu.ruc.iir.pard.sql.expr.ColumnItem; import cn.edu.ruc.iir.pard.sql.expr.Expr; +import cn.edu.ruc.iir.pard.sql.expr.Expr.LogicOperator; +import cn.edu.ruc.iir.pard.sql.expr.SingleExpr; +import cn.edu.ruc.iir.pard.sql.expr.TrueExpr; +import cn.edu.ruc.iir.pard.sql.tree.ComparisonExpression; import cn.edu.ruc.iir.pard.sql.tree.Expression; import cn.edu.ruc.iir.pard.sql.tree.Row; import com.google.common.collect.ImmutableList; import java.util.ArrayList; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -54,6 +68,7 @@ import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.logging.Level; import java.util.logging.Logger; @@ -267,56 +282,642 @@ public List generateTasks(Plan plan) // query plan if (plan instanceof QueryPlan) { - logger.info("Task generation for query plan"); + QueryPlan queryPlan = (QueryPlan) plan; try { - QueryPlan queryPlan = (QueryPlan) plan; - PlanNode planNode = queryPlan.getPlan(); - PlanNode currentNode = planNode; - UnionNode internalUnionNode = null; - while (currentNode.hasChildren()) { - currentNode = currentNode.getLeftChild(); - if (currentNode instanceof UnionNode) { - internalUnionNode = (UnionNode) currentNode; - break; + return processQueryPlan2(queryPlan); + } + catch (Exception e) { + e.printStackTrace(); + } + } + return null; + } + + public List processQueryPlan2(QueryPlan queryPlan) + { + logger.info("Task generation for query plan"); + PlanNode planNode = queryPlan.getPlan(); + ProjectNode proj = null; + PlanNode currentNode = planNode; + UnionNode internalUnionNode = null; + JoinNode joinNode = null; + while (currentNode != null) { + if (currentNode instanceof UnionNode) { + internalUnionNode = (UnionNode) currentNode; + } + if (currentNode instanceof ProjectNode) { + proj = (ProjectNode) currentNode; + } + if (currentNode instanceof JoinNode) { + joinNode = (JoinNode) currentNode; + } + currentNode = currentNode.getLeftChild(); + } + if (internalUnionNode == null && joinNode == null) { + return ImmutableList.of(new QueryTask(planNode)); + } + else if (joinNode != null) { + return ImmutableList.copyOf(processJoinTask(joinNode, proj, queryPlan.getJobId(), new AtomicInteger(0))); + } + else { + return ImmutableList.copyOf(processUnionTask(internalUnionNode, queryPlan.getJobId(), new AtomicInteger(1), proj)); + } + } + public QueryTask singleSiteTableTask(PlanNode node, String jobId, AtomicInteger jobOffset) + { + TableScanNode tableScanNode = null; + PlanNode root = node; + if (node instanceof TableScanNode) { + tableScanNode = (TableScanNode) node; + } + while (!(node instanceof TableScanNode) && node.hasChildren()) { + if (node.getLeftChild() instanceof TableScanNode) { + tableScanNode = (TableScanNode) node.getLeftChild(); + break; + } + node = node.getLeftChild(); + } + if (tableScanNode == null) { + return null; + } + QueryTask task = new QueryTask(tableScanNode.getSite(), NodeHelper.copyNode(root)); + task.setTaskId(jobId + "-" + jobOffset.addAndGet(1)); + return task; + } + public static class Pointer + { + private T value; + + public Pointer(T value) + { + super(); + this.value = value; + } + public T getValue() + { + return value; + } + + public void setValue(T value) + { + this.value = value; + } + public boolean isEmpty() + { + return value == null; + } + } + public TableScanNode getTableScanNode(PlanNode node) + { + while (node != null) { + if (node instanceof TableScanNode) { + return (TableScanNode) node; + } + node = node.getLeftChild(); + } + return null; + } + public ProjectNode getProjectNode(PlanNode node) + { + while (node != null) { + if (node instanceof ProjectNode) { + return (ProjectNode) node; + } + node = node.getLeftChild(); + } + return null; + } + public Expr getTableFilterNode(PlanNode node) + { + while (node != null) { + if (node instanceof FilterNode) { + return Expr.parse(((FilterNode) node).getExpression()); + } + node = node.getLeftChild(); + } + return new TrueExpr(); + } + //TODO: add projection. + public PlanNode setFilterAlias(PlanNode node, String alias, String oldTableName) + { + node = NodeHelper.copyNode(node); + PlanNode root = node; + PlanNode pNode = null; + do { + PlanNode oldNode = node; + PlanNode o = null; + if (node instanceof FilterNode) { + FilterNode filter = (FilterNode) node; + Expr expr = Expr.parse(filter.getExpression()); + expr = Expr.replaceTableName(expr, oldTableName, alias); + filter = new FilterNode(expr.toExpression()); + o = filter; + } + if (oldNode == root) { + root = node; + } + if (o != null) { + o.setChildren(node.getLeftChild(), true, true); + node = o; + if (pNode != null) { + pNode.setChildren(node, true, true); + } + } + pNode = node; + node = node.getLeftChild(); + }while (node != null); + return root; + } + public PlanNode setAlias(PlanNode node, String alias, String oldTableName, String realTableName) + { + //System.out.println(node); + node = NodeHelper.copyNode(node); + PlanNode root = node; + PlanNode pNode = null; + do { + PlanNode oldNode = node; + PlanNode o = null; + if (node instanceof TableScanNode) { + TableScanNode scan = (TableScanNode) node; + if (scan.getTable().equals(oldTableName)) { + scan = new TableScanNode(scan.getSchema(), realTableName, scan.getSite()); + scan.setAlias(alias); + } + scan.setChildren(node.getLeftChild(), true, true); + if (pNode != null) { + pNode.setChildren(scan, true, true); + } + node = scan; + } + else if (node instanceof ProjectNode) { + List colList = new ArrayList(); + for (Column col : ((ProjectNode) node).getColumns()) { + col = new Column(col); + //System.out.println(col.getTableName() + " " + oldTableName + " " + alias); + if (col.getTableName().equals(oldTableName)) { + col.setTableName(alias); } + colList.add(col); + } + ProjectNode p = new ProjectNode(colList); + p.setChildren(node.getLeftChild(), true, true); + if (pNode != null) { + pNode.setChildren(p, true, true); + } + node = p; + } + else if (node instanceof FilterNode) { + FilterNode filter = (FilterNode) node; + Expr expr = Expr.parse(filter.getExpression()); + expr = Expr.replaceTableName(expr, oldTableName, alias); + filter = new FilterNode(expr.toExpression()); + filter.setChildren(node.getLeftChild(), true, true); + if (pNode != null) { + pNode.setChildren(filter, true, true); + } + node = filter; + } + if (oldNode == root) { + root = node; + } + pNode = node; + node = node.getLeftChild(); + }while (node != null); + //System.out.println(root); + return root; + } + public void processJoinTask(JoinNode node, Map joinMap, Map sendDataMap, List otherTask, Pointer joinTableName, Pointer dataTableName, String jobId, AtomicInteger jobOffset, ProjectNode proj, String randomString) + { + PlanNode joinNode = null; + PlanNode dataNode = null; + TableScanNode joinTable = null; + TableScanNode dataTable = null; + Expr joinExpr = null; + Expr dataExpr = null; + + PlanNode left = node.getJoinChildren().get(0); + //System.out.println("left" + left); + PlanNode right = node.getJoinChildren().get(1); + //System.out.println("right" + right); + TableScanNode leftTable = getTableScanNode(left); + TableScanNode rightTable = getTableScanNode(right); + if (leftTable == null || rightTable == null) { + throw new TaskSchedulerException(ErrCode.UnSupportedQuery, " two or more table participate one join"); + } + if (joinTableName.isEmpty() || dataTableName.isEmpty()) { + joinTableName.setValue(leftTable.getTable()); + dataTableName.setValue(rightTable.getTable()); + } + if (leftTable.getTable().equals(joinTableName.getValue()) && rightTable.getTable().equals(dataTableName.getValue())) { + joinNode = left; + dataNode = right; + joinTable = leftTable; + dataTable = rightTable; + } + else if ((rightTable.getTable().equals(joinTableName.getValue()) && leftTable.getTable().equals(dataTableName.getValue()))) { + joinNode = right; + dataNode = left; + joinTable = rightTable; + dataTable = leftTable; + } + else { + throw new TaskSchedulerException(ErrCode.UnSupportedQuery, "below one union has more than one group of joins."); + } + //System.out.println("joinNode1" + joinNode); + joinExpr = getTableFilterNode(joinNode); + dataExpr = getTableFilterNode(dataNode); + SendDataTask dataTask = sendDataMap.get(dataTable.getSite()); + + Expr dataTaskExpr = extractTableExpr(joinExpr, dataExpr, dataTableName.getValue(), node); + Expr joinTaskSingleTableExpr = extractTableExpr(joinExpr, dataExpr, joinTableName.getValue(), node); + String tmpTableName = "tmp_" + dataTableName.getValue() + "_" + jobId + "_" + joinTable.getSite(); + tmpTableName = tmpTableName.replace('-', '_').replace('-', '_').replace('-', '_'); + tmpTableName += randomString; + String tmpTableAlias = dataTableName.getValue() + "p0"; + while (tmpTableName.contains(" ")) { + tmpTableName = tmpTableName.replace(" ", ""); + } + boolean needDropTable = true; +// if (dataTable.getSite().equals(joinTable.getSite())) { + //位于相同站点,不需要发数据 +// tmpTableName = dataTableName.getValue(); +// needDropTable = false; + // } +// else { + if (dataTask == null) { + dataTask = new SendDataTask(dataTable.getSite()); + dataTask.setSchemaName(dataTable.getTable()); + PlanNode p = new OutputNode(); + p.setChildren(NodeHelper.copyNode(dataNode), true, true); + dataTask.setNode(orFilterNode(p, dataTaskExpr)); + dataTask.getSiteExpression().put(joinTable.getSite(), dataTaskExpr.toExpression()); + dataTask.setTaskId(jobId + "_" + jobOffset.addAndGet(1)); + dataTask.getTmpTableMap().put(joinTable.getSite(), tmpTableName); + sendDataMap.put(dataTable.getSite(), dataTask); + } + else { + PlanNode p = dataTask.getNode(); + dataTask.setNode(orFilterNode(p, dataTaskExpr)); + dataTask.getSiteExpression().put(joinTable.getSite(), dataTaskExpr.toExpression()); + dataTask.getTmpTableMap().put(joinTable.getSite(), tmpTableName); + } +// } + JoinTask joinTask = joinMap.get(joinTable.getSite()); + if (joinTask == null) { + //Do sth. + Iterator iter = node.getJoinSet().iterator(); + String common = null; + if (iter.hasNext()) { + common = iter.next(); + } + /* + if ((!node.getJoinSet().isEmpty()) && node.getExprList().isEmpty()) { + if (!col.getColumnName().equals(common)) { + col.setTableName(null); } - if (internalUnionNode == null) { - return ImmutableList.of(new QueryTask(planNode)); + }*/ + //设置为实际表的名字 但是因为别名的存在 所以目前不需要 + //if (dataTableName.getValue().equals(col.getTableName())) { + // col.setTableName(tmpTableName); + //} + JoinNode join = new JoinNode(); + join.getJoinSet().addAll(node.getJoinSet()); + //join.getExprList().addAll(node.getExprList()); + for (Expression expr : node.getExprList()) { + Expr e = Expr.parse(expr); + e = Expr.replaceTableName(e, dataTableName.getValue(), tmpTableAlias); + join.getExprList().add((ComparisonExpression) e.toExpression()); + } + //TODO: add children. + join.addJoinChild(NodeHelper.copyNode(joinNode)); + //System.out.println("joinNode1" + joinNode); + //join.addJoinChild(orFilterNode(NodeHelper.copyNode(joinNode), joinTaskSingleTableExpr, dataTableName.getValue(), tmpTableName)); + PlanNode dataCopy = NodeHelper.copyNode(dataNode); + dataCopy = setAlias(dataCopy, tmpTableAlias, dataTableName.getValue(), tmpTableName); + /* + PlanNode tmp = dataCopy; + if (dataCopy instanceof TableScanNode) { + TableScanNode scan = (TableScanNode) dataCopy; + dataCopy = new TableScanNode(scan.getSchema(), tmpTableName, joinTable.getSite()); + ((TableScanNode) dataCopy).setAlias(dataTableName.getValue()); + } + else { + while (tmp.hasChildren()) { + if (tmp.getLeftChild() instanceof TableScanNode) { + TableScanNode scan = (TableScanNode) tmp.getLeftChild(); + scan = new TableScanNode(scan.getSchema(), tmpTableName, joinTable.getSite()); + scan.setAlias(dataTableName.getValue()); + tmp.setChildren(scan, true, false); + break; + } + tmp = tmp.getLeftChild(); } - List tasks = new ArrayList<>(); - List unionChildren = internalUnionNode.getUnionChildren(); - int index = 0; - for (PlanNode childNode : unionChildren) { - internalUnionNode.setChildren(childNode, true, false); - PlanNode node = childNode; - TableScanNode tableScanNode = null; - if (node instanceof TableScanNode) { - tableScanNode = (TableScanNode) node; + }*/ + join.addJoinChild(dataCopy); + joinTask = new JoinTask(joinTable.getSite()); + if (needDropTable) { + joinTask.setTmpTableName(tmpTableName); + } + else { + joinTask.setTmpTableName(null); + } + joinTask.setTaskId(jobId + "_" + jobOffset.addAndGet(1)); + PlanNode output = new OutputNode(); + ProjectNode leftProject = getProjectNode(join.getJoinChildren().get(0)); + ProjectNode rightProject = getProjectNode(join.getJoinChildren().get(1)); + List clist = new ArrayList(); + for (Column c : proj.getColumns()) { + boolean hit = false; + for (Column lc : leftProject.getColumns()) { + if (lc.getColumnName().equals(c.getColumnName())) { + clist.add(lc); + hit = true; + break; } - while (!(node instanceof TableScanNode) && node.hasChildren()) { - if (node.getLeftChild() instanceof TableScanNode) { - tableScanNode = (TableScanNode) node.getLeftChild(); + } + if (!hit) { + for (Column rc : rightProject.getColumns()) { + if (rc.getColumnName().equals(c.getColumnName())) { + clist.add(rc); break; } - node = node.getLeftChild(); } - if (tableScanNode == null) { - return null; + } + } + PlanNode p = new ProjectNode(clist); + output.setChildren(p, true, true); + p.setChildren(join, true, true); + joinTask.setNode(output); + //System.out.println("joinNode1" + p); + joinMap.put(joinTable.getSite(), joinTask); + } + else { + PlanNode p = joinTask.getNode(); + p = NodeHelper.copyNode(p); + PlanNode root = p; + while (p.hasChildren()) { + if (p.getLeftChild() instanceof JoinNode) { + JoinNode jnode = (JoinNode) p.getLeftChild(); + //System.out.println(); + //System.out.println(jnode.getJoinChildren().size() + "aaaaaaaaa"); + PlanNode pnode = jnode.getJoinChildren().get(0); + pnode = orFilterNode(NodeHelper.copyNode(pnode), joinTaskSingleTableExpr); + jnode.getJoinChildren().set(0, pnode); + break; + } + p = p.getLeftChild(); + } + joinTask.setNode(root); + } + } + public PlanNode orFilterNode(PlanNode node, Expr filterExpr) + { + //System.out.println(node == null); + node = NodeHelper.copyNode(node); + //System.out.println(node == null); + PlanNode root = node; + while (node.hasChildren()) { + if (node.getLeftChild() instanceof FilterNode) { + FilterNode children = (FilterNode) node.getLeftChild(); + Expr e1 = Expr.parse(children.getExpression()); + //if (oldTableName != null) { + //System.out.println("before replace e1 " + e1.toString() + "from " + oldTableName + " to" + newTableName); + //e1 = Expr.replaceTableName(e1); + //filterExpr = Expr.replaceTableName(filterExpr, oldTableName, newTableName); + //System.out.println("after replace e1 " + e1.toString()); + //} + e1 = Expr.or(e1, filterExpr, LogicOperator.AND); + children.setExpression(e1.toExpression()); + return root; + } + node = node.getLeftChild(); + if (node == null) { + break; + } + } + return root; + } + public Expr extractTableExpr(Expr joinExpr, Expr dataExpr, String tableName, JoinNode node) + { + List tasks = new ArrayList<>(); + Expr e = Expr.and(joinExpr, dataExpr, LogicOperator.AND); + if (node.getExprList().size() == 0) { + return new TrueExpr(); + } + SingleExpr se = (SingleExpr) Expr.parse(node.getExprList().get(0)); + ColumnItem lv = (ColumnItem) se.getLvalue(); + ColumnItem rv = (ColumnItem) se.getRvalue(); + if (lv.getTableName().equalsIgnoreCase(tableName)) { + Expr opt = Expr.replace(e, rv, lv); + opt = Expr.extractTableFilter(opt, tableName); + return Expr.optimize(opt, LogicOperator.AND); + } + else if (rv.getTableName().equalsIgnoreCase(tableName)) { + Expr opt = Expr.replace(e, lv, rv); + opt = Expr.extractTableFilter(opt, tableName); + return Expr.optimize(opt, LogicOperator.AND); + } + else { + throw new TaskSchedulerException(ErrCode.ParseError, "expression that can replace"); + } + } + public List processJoinTask(JoinNode node, ProjectNode proj, String jobId, AtomicInteger jobOffset) + { + List tasks = new ArrayList<>(); + Pointer joinTableName = new Pointer(null); + Pointer dataTableName = new Pointer(null); + Map sendDataMap = new HashMap(); + Map joinMap = new HashMap(); + List otherTask = new ArrayList(); + String randomString = "_" + (int) (Math.random() * Integer.MAX_VALUE); + processJoinTask((JoinNode) node, joinMap, sendDataMap, otherTask, joinTableName, dataTableName, jobId, jobOffset, proj, randomString); + tasks.addAll(otherTask); + tasks.addAll(sendDataMap.values()); + // tasks.addAll(joinMap.values()); + tasks.addAll(joinMap.values()); + return tasks; + } + public List processUnionTask(UnionNode union, String jobId, AtomicInteger jobOffset, ProjectNode proj) + { + List tasks = new ArrayList<>(); + List unionChildren = union.getUnionChildren(); + //List sendDataList = new ArrayList(); + //List joinList = new ArrayList(); + List otherTask = new ArrayList(); + //int index = jobOffset; + Map sendDataMap = new HashMap(); + Map joinMap = new HashMap(); + Pointer joinTableName = new Pointer(null); + Pointer dataTableName = new Pointer(null); + String randomString = "_" + (int) (Math.random() * Integer.MAX_VALUE); + for (PlanNode childNode : unionChildren) { + //union.setChildren(childNode, true, false); + PlanNode node = childNode; + while (node != null) { + if (node instanceof TableScanNode) { + PlanNode p = new OutputNode(); + PlanNode root = p; + if (!(childNode instanceof ProjectNode)) { + PlanNode projection = NodeHelper.copyNode(proj); + if (projection != null) { + p.setChildren(projection, true, true); + p = projection; + } + } + p.setChildren(childNode, true, true); + root = NodeHelper.copyNode(root); + Task tableTask = singleSiteTableTask(root, jobId, jobOffset); + if (tableTask != null) { + otherTask.add(tableTask); } - QueryTask task = new QueryTask(tableScanNode.getSite(), NodeHelper.copyNode(planNode)); - task.setTaskId(plan.getJobId() + "-" + index); - tasks.add(task); - index++; + break; + } + else if (node instanceof JoinNode && ((JoinNode) node).getJoinChildren().size() == 2) { + //需要收集task的site,确定主表从表 + processJoinTask((JoinNode) node, joinMap, sendDataMap, otherTask, joinTableName, dataTableName, jobId, jobOffset, proj, randomString); } - return ImmutableList.copyOf(tasks); + else if (node instanceof JoinNode && ((JoinNode) node).getJoinChildren().size() == 1) { + node = ((JoinNode) node).getJoinChildren().get(0); + //don't need to do anything. + } + else if (node instanceof JoinNode) { + throw new TaskSchedulerException(ErrCode.UnSupportedQuery, " join node has more than one children"); + } + node = node.getLeftChild(); } - catch (Exception e) { - e.printStackTrace(); + } + tasks.addAll(otherTask); + tasks.addAll(sendDataMap.values()); + // tasks.addAll(joinMap.values()); + tasks.addAll(joinMap.values()); + //joinMap.values().forEach(tasks::add); + return tasks; + } + public List processQueryPlan(QueryPlan queryPlan) + { + logger.info("Task generation for query plan"); + PlanNode planNode = queryPlan.getPlan(); + PlanNode currentNode = planNode; + UnionNode internalUnionNode = null; + while (currentNode.hasChildren()) { + currentNode = currentNode.getLeftChild(); + if (currentNode instanceof UnionNode) { + internalUnionNode = (UnionNode) currentNode; + break; + } + } + if (internalUnionNode == null) { + return ImmutableList.of(new QueryTask(planNode)); + } + List tasks = new ArrayList<>(); + List unionChildren = internalUnionNode.getUnionChildren(); + int index = 0; + for (PlanNode childNode : unionChildren) { + internalUnionNode.setChildren(childNode, true, false); + PlanNode node = childNode; + TableScanNode tableScanNode = null; + if (node instanceof TableScanNode) { + tableScanNode = (TableScanNode) node; + } + while (!(node instanceof TableScanNode) && node.hasChildren()) { + if (node.getLeftChild() instanceof TableScanNode) { + tableScanNode = (TableScanNode) node.getLeftChild(); + break; + } + node = node.getLeftChild(); + } + if (tableScanNode == null) { + return null; } + QueryTask task = new QueryTask(tableScanNode.getSite(), NodeHelper.copyNode(planNode)); + task.setTaskId(queryPlan.getJobId() + "-" + index); + tasks.add(task); + index++; + } + return ImmutableList.copyOf(tasks); + } + public TaskState executeQueryTask(List tasks, boolean isLocal) + { + if (isLocal) { + PardResultSet resultSet = new PardResultSet(); + Map taskMap = new HashMap<>(); + BlockingQueue blocks = new LinkedBlockingQueue<>(); + TaskState state = new TaskState(taskMap, blocks); + state.setResultSet(resultSet); + for (Task task : tasks) { + String site = task.getSite(); + String taskId = task.getTaskId(); + Site nodeSite = siteDao.listNodes().get(site); + if (nodeSite == null) { + logger.log(Level.SEVERE, "Node " + site + " is not active. Please check."); + state.setResultSet(PardResultSet.execErrResultSet); + return state; + } + PardExchangeClient client = new PardExchangeClient(nodeSite.getIp(), nodeSite.getExchangePort()); + client.connect(task, blocks); + taskMap.put(taskId, task); + } + return state; } return null; } - + /* + public PardResultSet executeQueryPlanJob(Job job, QueryPlan plan, List tasks) + { + logger.info("Executing query tasks for job[" + job.getJobId() + "]"); + PardResultSet resultSet = new PardResultSet(); + Map taskMap = new HashMap<>(); + BlockingQueue blocks = new LinkedBlockingQueue<>(); + List sendDataTask = new ArrayList(); + List joinTask = new ArrayList(); + List otherTask = new ArrayList(); + for (Task task : tasks) { + if (task instanceof SendDataTask) { + sendDataTask.add((SendDataTask) task); + continue; + } + if (task instanceof JoinTask) { + joinTask.add((JoinTask) task); + continue; + } + else { + otherTask.add(task); + } + String site = task.getSite(); + String taskId = task.getTaskId(); + Site nodeSite = siteDao.listNodes().get(site); + if (nodeSite == null) { + logger.log(Level.SEVERE, "Node " + site + " is not active. Please check."); + return PardResultSet.execErrResultSet; + } + PardExchangeClient client = new PardExchangeClient(nodeSite.getIp(), nodeSite.getExchangePort()); + client.connect(task, blocks); + taskMap.put(taskId, task); + } + // wait for all tasks done + while (!taskMap.isEmpty()) { + Block block = null; + try { + block = blocks.poll(8000, TimeUnit.MILLISECONDS); + } + catch (InterruptedException e) { + e.printStackTrace(); + } + if (block == null) { + logger.info("Waiting for more blocks..."); + continue; + } + resultSet.addBlock(block); + logger.info("Added block " + block.getSequenceId() + ", num of rows: " + block.getRows().size()); + if (!block.isSequenceHasNext()) { + String taskId = block.getTaskId(); + taskMap.remove(taskId); + logger.info("Task " + taskId + " done."); + } + } + plan.afterExecution(true); + return resultSet; + }*/ // todo this sucks, full of if else public PardResultSet executeJob(Job job) { @@ -338,7 +939,7 @@ public PardResultSet executeJob(Job job) if (plan instanceof SchemaShowPlan) { SchemaDao schemaDao = new SchemaDao(); Set schemas = schemaDao.listAll(); - Column header = new Column(0, DataType.VARCHAR.getType(), "schema", 100, 0, 0); + Column header = new Column(0, DataType.VARCHAR.getType(), "schema", 100, 0, 0, null); PardResultSet resultSet = new PardResultSet(PardResultSet.ResultStatus.OK, ImmutableList.of(header)); for (String schemaName : schemas) { RowConstructor rowConstructor = new RowConstructor(); @@ -352,7 +953,7 @@ public PardResultSet executeJob(Job job) SchemaDao schemaDao = new SchemaDao(); Schema schema = schemaDao.loadByName(((TableShowPlan) plan).getSchema()); List
tables = schema.getTableList(); - Column header = new Column(0, DataType.VARCHAR.getType(), "table", 100, 0, 0); + Column header = new Column(0, DataType.VARCHAR.getType(), "table", 100, 0, 0, null); PardResultSet resultSet = new PardResultSet(PardResultSet.ResultStatus.OK, ImmutableList.of(header)); for (Table table : tables) { RowConstructor rowConstructor = new RowConstructor(); @@ -413,45 +1014,7 @@ public PardResultSet executeJob(Job job) // this is a simplest implementation // todo collected result set form exchange client shall be passed on for next query stage if (plan instanceof QueryPlan) { - logger.info("Executing query tasks for job[" + job.getJobId() + "]"); - PardResultSet resultSet = new PardResultSet(); - Map taskMap = new HashMap<>(); - BlockingQueue blocks = new LinkedBlockingQueue<>(); - for (Task task : tasks) { - String site = task.getSite(); - String taskId = task.getTaskId(); - Site nodeSite = siteDao.listNodes().get(site); - if (nodeSite == null) { - logger.log(Level.SEVERE, "Node " + site + " is not active. Please check."); - return PardResultSet.execErrResultSet; - } - PardExchangeClient client = new PardExchangeClient(nodeSite.getIp(), nodeSite.getExchangePort()); - client.connect(task, blocks); - taskMap.put(taskId, task); - } - // wait for all tasks done - while (!taskMap.isEmpty()) { - Block block = null; - try { - block = blocks.poll(8000, TimeUnit.MILLISECONDS); - } - catch (InterruptedException e) { - e.printStackTrace(); - } - if (block == null) { - logger.info("Waiting for more blocks..."); - continue; - } - resultSet.addBlock(block); - logger.info("Added block " + block.getSequenceId() + ", num of rows: " + block.getRows().size()); - if (!block.isSequenceHasNext()) { - String taskId = block.getTaskId(); - taskMap.remove(taskId); - logger.info("Task " + taskId + " done."); - } - } - plan.afterExecution(true); - return resultSet; + return new QueryJobExecutor(job).execute(); } // delete diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskState.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskState.java new file mode 100644 index 0000000..3845f6d --- /dev/null +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/scheduler/TaskState.java @@ -0,0 +1,71 @@ +package cn.edu.ruc.iir.pard.scheduler; + +import cn.edu.ruc.iir.pard.executor.connector.Block; +import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; +import cn.edu.ruc.iir.pard.executor.connector.Task; + +import java.util.Map; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.TimeUnit; + +public class TaskState +{ + private Map taskMap = null; + private BlockingQueue blocks = null; + private PardResultSet resultSet = null; + public TaskState(Map taskMap, BlockingQueue blocks) + { + super(); + this.taskMap = taskMap; + this.blocks = blocks; + } + public Map getTaskMap() + { + return taskMap; + } + public void setTaskMap(Map taskMap) + { + this.taskMap = taskMap; + } + public BlockingQueue getBlocks() + { + return blocks; + } + public int available() + { + return blocks.size(); + } + public Block fetch() + { + Block block = null; + do { + try { + block = blocks.poll(8000, TimeUnit.MILLISECONDS); + } + catch (InterruptedException e) { + e.printStackTrace(); + } + }while (block == null && !isDone()); + if (!block.isSequenceHasNext()) { + String taskId = block.getTaskId(); + taskMap.remove(taskId); + } + return block; + } + public void setBlocks(BlockingQueue blocks) + { + this.blocks = blocks; + } + public PardResultSet getResultSet() + { + return resultSet; + } + public void setResultSet(PardResultSet resultSet) + { + this.resultSet = resultSet; + } + public boolean isDone() + { + return taskMap.isEmpty(); + } +} diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardQueryHandler.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardQueryHandler.java index c58655e..0fdfceb 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardQueryHandler.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardQueryHandler.java @@ -1,6 +1,9 @@ package cn.edu.ruc.iir.pard.server; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.commons.exception.ParsingException; +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; +import cn.edu.ruc.iir.pard.commons.exception.TaskSchedulerException; import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; import cn.edu.ruc.iir.pard.executor.connector.Task; import cn.edu.ruc.iir.pard.planner.PardPlanner; @@ -8,7 +11,6 @@ import cn.edu.ruc.iir.pard.scheduler.Job; import cn.edu.ruc.iir.pard.scheduler.JobScheduler; import cn.edu.ruc.iir.pard.scheduler.TaskScheduler; -import cn.edu.ruc.iir.pard.semantic.SemanticException; import cn.edu.ruc.iir.pard.sql.parser.SqlParser; import cn.edu.ruc.iir.pard.sql.tree.Statement; @@ -56,10 +58,19 @@ public PardQueryHandler( @Override public void run() { + int c = 0; try (BufferedReader input = new BufferedReader( new InputStreamReader(socket.getInputStream()))) { while (true) { String line = input.readLine(); + if (line == null) { + //logger.info("Empty line"); + c++; + if (c > 10) { + break; + } + continue; + } if (line.equalsIgnoreCase("EXIT") || line.equalsIgnoreCase("QUIT")) { logger.info("CLIENT QUIT"); @@ -96,6 +107,9 @@ public PardResultSet executeQuery(String sql) catch (ParsingException e) { return new PardResultSet(PardResultSet.ResultStatus.PARSING_ERR); } + catch (NullPointerException e1) { + return new PardResultSet(PardResultSet.ResultStatus.PARSING_ERR, e1.getMessage()); + } if (statement == null) { jobScheduler.failJob(job.getJobId()); logger.log(Level.WARNING, "Cannot create statement for sql: " + sql); @@ -106,27 +120,39 @@ public PardResultSet executeQuery(String sql) logger.info("Created statement for job[" + job.getJobId() + "], job state: " + job.getJobState()); Plan plan = null; + ErrorMessage msg = ErrorMessage.getOKMessage(); try { plan = planner.plan(statement); } catch (SemanticException e) { logger.log(Level.WARNING, e.getSemanticErrorMessage().toString()); + msg = e.getSemanticErrorMessage(); + if (msg == null) { + msg = ErrorMessage.getOKMessage(); + } } if (plan == null) { jobScheduler.failJob(job.getJobId()); logger.log(Level.WARNING, "Cannot create plan for sql: " + sql); - return new PardResultSet(PardResultSet.ResultStatus.PLANNING_ERR); + return new PardResultSet(PardResultSet.ResultStatus.PLANNING_ERR, msg.getErrmsg()); } job.setPlan(plan); plan.setJobId(job.getJobId()); jobScheduler.updateJob(job.getJobId()); logger.info("Created plan for job[" + job.getJobId() + "], job state: " + job.getJobState()); - List tasks = taskScheduler.generateTasks(plan); + List tasks = null; + String taskMsg = null; + try { + tasks = taskScheduler.generateTasks(plan); + } + catch (TaskSchedulerException e) { + taskMsg = e.getPardErrorMessage().toString(); + } if (tasks == null) { jobScheduler.failJob(job.getJobId()); logger.log(Level.WARNING, "Cannot create tasks for sql: " + sql); - return new PardResultSet(PardResultSet.ResultStatus.SCHEDULING_ERR); + return new PardResultSet(PardResultSet.ResultStatus.SCHEDULING_ERR, taskMsg); } if (!tasks.isEmpty()) { tasks.forEach(job::addTask); @@ -135,6 +161,9 @@ public PardResultSet executeQuery(String sql) logger.info("Generated tasks for job[" + job.getJobId() + "], job state: " + job.getJobState()); PardResultSet resultSet = taskScheduler.executeJob(job); + if (plan.getMsg() != null) { + resultSet.setSemanticErrmsg(plan.getMsg().toString()); + } if (resultSet.getStatus() != PardResultSet.ResultStatus.OK) { jobScheduler.failJob(job.getJobId()); logger.log(Level.WARNING, "Failed to execute job for sql: " + sql); diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardWebServer.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardWebServer.java index 0249fa7..0d0fce7 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardWebServer.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/server/PardWebServer.java @@ -32,7 +32,15 @@ public void run() context.addServlet(staticHolder, "*.css"); context.addServlet(staticHolder, "*.jpg"); context.setResourceBase("."); - ServletHolder pardHolder = new ServletHolder(new PardServlet()); + PardServlet s = new PardServlet(); + //try { + // s.test(); + //} + //catch (ServletException e1) { + // TODO Auto-generated catch block + // e1.printStackTrace(); + //} + ServletHolder pardHolder = new ServletHolder(s); context.addServlet(pardHolder, "*.pard"); //context.setParentLoaderPriority(true); jettyServer.setHandler(context); diff --git a/pard-main/src/main/java/cn/edu/ruc/iir/pard/web/PardServlet.java b/pard-main/src/main/java/cn/edu/ruc/iir/pard/web/PardServlet.java index d5320af..b909c13 100644 --- a/pard-main/src/main/java/cn/edu/ruc/iir/pard/web/PardServlet.java +++ b/pard-main/src/main/java/cn/edu/ruc/iir/pard/web/PardServlet.java @@ -4,8 +4,9 @@ import cn.edu.ruc.iir.pard.executor.connector.node.NodeHelper; import cn.edu.ruc.iir.pard.executor.connector.node.OutputNode; import cn.edu.ruc.iir.pard.executor.connector.node.PlanNode; -import cn.edu.ruc.iir.pard.planner.PardPlanner; +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan2; import cn.edu.ruc.iir.pard.sql.parser.SqlParser; import cn.edu.ruc.iir.pard.sql.tree.Statement; import net.sf.json.JSONObject; @@ -18,7 +19,6 @@ import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; -import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; @@ -36,12 +36,27 @@ public class PardServlet * */ public void test() throws ServletException { - PardPlanner planner = new PardPlanner(); + //PardPlanner planner = new PardPlanner(); SqlParser parser = new SqlParser(); Statement stmt = parser.createStatement("SELECT * FROM pardtest.emp where eno < 'E0010' and eno > 'E0000'"); - planner.plan(stmt).afterExecution(true); - stmt = parser.createStatement("SELECT * FROM pardtest.emp@pard3"); - planner.plan(stmt).afterExecution(true); + //plan(stmt).afterExecution(true); + //stmt = parser.createStatement("SELECT * FROM pardtest.emp@pard3"); + // plan(stmt).afterExecution(true); + UsePlan.setCurrentSchema("book"); + stmt = parser.createStatement("select Book.title,Book.copies,Publisher.name,Publisher.nation from Book,Publisher where Book.publisher_id=Publisher.id and Publisher.nation='USA' and Book.copies > 1000"); + plan(stmt).afterExecution(true); + //stmt = parser.createStatement("select * from Customer where id<3 and rank >1"); + // plan(stmt).afterExecution(true); + // stmt = parser.createStatement("select id,rank from Customer where id<3 and rank >1"); + //plan(stmt).afterExecution(true); + stmt = parser.createStatement("select customer.name, orders.quantity, book.title from customer,orders,book where customer.id=orders.customer_id and book.id=orders.book_id and customer.rank=1 and book.copies>5000"); + plan(stmt).afterExecution(true); + } + public QueryPlan plan(Statement stmt) + { + QueryPlan plan = new QueryPlan2(stmt); + //plan.afterExecution(true); + return plan; } public PNode parse(PlanNode pnode) { @@ -49,8 +64,10 @@ public PNode parse(PlanNode pnode) pn.setKey(pnode.getName() + (++keyGen)); StringBuilder sb = new StringBuilder(); Map map = NodeHelper.getPlanNodeInfo(pnode); - for (String key : map.keySet()) { - sb.append(key).append(":").append(map.get(key)).append("\n"); + if (map != null && map.keySet() != null) { + for (String key : map.keySet()) { + sb.append(key).append(":").append(map.get(key)).append("\n"); + } } pn.setText(sb.toString()); pn.setFigure("Rectangle"); @@ -100,31 +117,111 @@ public String getListBody() throws IOException System.out.println("sb " + sb.toString()); return body; } - + public int giveX(int nodeNo, int maxCount, int xInc, int levelCount) + { + /* + int offset = (maxCount - levelCount) / 2; + offset += nodeNo; + offset -= 1; + offset = offset * xInc; + return offset;*/ + int realInc = maxCount / levelCount; + int offset = nodeNo * realInc; + offset -= realInc / 2; + offset = offset * xInc; + return offset; + } + public int giveY(int levelNo, int yInc) + { + return (levelNo - 1) * yInc; + } public String getBody(QueryPlan plan) throws IOException { List nodeDataArray = new ArrayList(); List linkedDataArray = new ArrayList(); - PlanNode node = ((QueryPlan) plan).optimize(); - Map mapping = new HashMap(); + PlanNode node = ((QueryPlan) plan).getPlan(); + //Map mapping = new HashMap(); Queue que = new LinkedList(); + + //Map nodeLevel = new HashMap<>(); + //Map nodeNumber = new HashMap<>(); + Queue nodeLevelQue = new LinkedList(); + Queue nodeLevelQue2 = new LinkedList(); + Queue nodeNumberQueue = new LinkedList(); + //Queue nodeNumberQueue2 = new LinkedList(); + int[] nodeNo = new int[100]; + que.add(node); + //nodeLevel.put(node, 1); + nodeLevelQue.add(1); + nodeLevelQue2.add(1); + nodeNo[1]++; + int maxLevel = 1; + int maxNumber = 1; + //nodeNumber.put(node, nodeNo[1]); + nodeNumberQueue.add(nodeNo[1]); + //nodeNumberQueue2.add(nodeNo[1]); + while (!que.isEmpty()) { + PlanNode planNode = que.poll(); + int level = nodeLevelQue.poll(); //nodeLevel.get(planNode); + if (level > maxLevel) { + maxLevel = level; + } + level++; + List children = NodeHelper.getChildren(planNode); + for (PlanNode pn : children) { + if (pn == null) { + continue; + } + que.add(pn); + //nodeLevel.put(pn, level); + nodeLevelQue.add(level); + nodeLevelQue2.add(level); + nodeNo[level]++; + //nodeNumber.put(pn, nodeNo[level]); + nodeNumberQueue.add(nodeNo[level]); + } + } + for (int i = 0; i < maxLevel; i++) { + if (nodeNo[i] > maxNumber) { + maxNumber = nodeNo[i]; + } + } que.add(node); PNode pa = parse(node); - pa.locx = 0; - pa.locy = 60; - mapping.put(node, pa); + int xInc = 270; + int yInc = 200; + pa.locx = giveX(1, maxNumber, xInc, 1); + pa.locy = 60 + giveY(1, yInc); + //mapping.put(node, pa); nodeDataArray.add(pa); + Queue pque = new LinkedList(); + pque.add(pa); + nodeLevelQue2.poll(); + nodeNumberQueue.poll(); while (!que.isEmpty()) { PlanNode planNode = que.poll(); - PNode parent = mapping.get(planNode); + PNode parent = pque.poll(); + //PNode parent = mapping.get(planNode); List pnlist = NodeHelper.getChildren(planNode); int xoffset = 0; for (PlanNode pnode : pnlist) { + int level = nodeLevelQue2.poll(); //nodeLevel.get(pnode); + int levelNo = nodeNumberQueue.poll(); //nodeNumber.get(pnode); PNode p = parse(pnode); - p.locx = parent.locx + xoffset * 320; + pque.add(p); + p.locx = giveX(levelNo, maxNumber, xInc, nodeNo[level]); //parent.locx + xoffset * 220; + + if (pnlist.size() == 1) { + p.locx = parent.locx; + } + /* + else if (pnlist.size() == 2) { + p.locx = parent.locx - xInc / 2 + xoffset * xInc; + xoffset++; + }*/ xoffset++; - p.locy = parent.locy + 150; - mapping.put(pnode, p); + p.locy = giveY(level, yInc); + //mapping.put(pnode, p); PEdge e = new PEdge(); e.from = parent.getKey(); e.to = p.getKey(); diff --git a/pard-main/src/main/resources/webapp/_demo.html b/pard-main/src/main/resources/webapp/_demo.html index 7fad5c2..a7fde0d 100644 --- a/pard-main/src/main/resources/webapp/_demo.html +++ b/pard-main/src/main/resources/webapp/_demo.html @@ -21,7 +21,7 @@ - +
@@ -73,8 +73,8 @@
- -
+ +
diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/TestQueryHandler.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/TestQueryHandler.java index 9dbd902..eb5c11c 100644 --- a/pard-main/src/test/java/cn/edu/ruc/iir/pard/TestQueryHandler.java +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/TestQueryHandler.java @@ -19,7 +19,7 @@ public void executeQuery() JobScheduler scheduler = JobScheduler.INSTANCE(); TaskScheduler taskScheduler = TaskScheduler.INSTANCE(); PardQueryHandler handler = new PardQueryHandler(null, scheduler, taskScheduler); - String sql = "load \"/Users/Jelly/Downloads/eval_db/book.tsv\" into book.book"; + String sql = "delete from booktest.customer where 1=1"; PardResultSet resultSet = handler.executeQuery(sql); System.out.println(resultSet); } diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/catelog/CatelogTest.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/catelog/CatelogTest.java index 0d99f6b..7aa09e7 100644 --- a/pard-main/src/test/java/cn/edu/ruc/iir/pard/catelog/CatelogTest.java +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/catelog/CatelogTest.java @@ -3,10 +3,10 @@ import cn.edu.ruc.iir.pard.catalog.Column; import cn.edu.ruc.iir.pard.catalog.Schema; import cn.edu.ruc.iir.pard.catalog.Table; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.SchemaDao; import cn.edu.ruc.iir.pard.etcd.dao.SiteDao; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.PardPlanner; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.TableCreationPlan; diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/scheduler/TaskSchedulerTest.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/scheduler/TaskSchedulerTest.java new file mode 100644 index 0000000..5b59eab --- /dev/null +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/scheduler/TaskSchedulerTest.java @@ -0,0 +1,104 @@ +package cn.edu.ruc.iir.pard.scheduler; + +import cn.edu.ruc.iir.pard.catalog.Column; +import cn.edu.ruc.iir.pard.executor.connector.Task; +import cn.edu.ruc.iir.pard.planner.PardPlanner; +import cn.edu.ruc.iir.pard.planner.Plan; +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; +import cn.edu.ruc.iir.pard.scheduler.JobScheduler.JobState; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; +import cn.edu.ruc.iir.pard.sql.tree.ComparisonExpression; +import cn.edu.ruc.iir.pard.sql.tree.Expression; +import cn.edu.ruc.iir.pard.sql.tree.LogicalBinaryExpression; +import cn.edu.ruc.iir.pard.sql.tree.Statement; +import net.sf.json.JSONArray; +import net.sf.json.JSONObject; +import net.sf.json.JsonConfig; +import net.sf.json.processors.JsonValueProcessor; +import org.testng.annotations.Test; + +import java.util.List; + +public class TaskSchedulerTest +{ + SqlParser parser = new SqlParser(); + @Test + public void test2() + { + UsePlan.setCurrentSchema("book"); + String sql = "select Book.title,Book.copies,Publisher.name,Publisher.nation from Book,Publisher where Book.publisher_id=Publisher.id and Publisher.nation='USA' and Book.copies > 1000"; + Statement stmt = parser.createStatement(sql); + PardPlanner planner = new PardPlanner(); + Plan plan = planner.plan(stmt); + plan.setJobId("aa"); + List task = TaskScheduler.INSTANCE().generateTasks(plan); + JsonConfig config = new JsonConfig(); + config.setExcludes(new String[]{"rightChild"}); + config.registerJsonValueProcessor(Expression.class, new JsonValueProcessor(){ + @Override + public Object processArrayValue(Object arg0, JsonConfig arg1) + { + return arg0.toString(); + } + + @Override + public Object processObjectValue(String arg0, Object arg1, JsonConfig arg2) + { + return arg1.toString(); + } + }); + config.registerJsonValueProcessor(ComparisonExpression.class, new JsonValueProcessor(){ + @Override + public Object processArrayValue(Object arg0, JsonConfig arg1) + { + return arg0.toString(); + } + + @Override + public Object processObjectValue(String arg0, Object arg1, JsonConfig arg2) + { + return arg1.toString(); + } + }); + config.registerJsonValueProcessor(LogicalBinaryExpression.class, new JsonValueProcessor(){ + @Override + public Object processArrayValue(Object arg0, JsonConfig arg1) + { + return arg0.toString(); + } + + @Override + public Object processObjectValue(String arg0, Object arg1, JsonConfig arg2) + { + return arg1.toString(); + } + }); + config.registerJsonValueProcessor(Column.class, new JsonValueProcessor(){ + @Override + public Object processArrayValue(Object arg0, JsonConfig arg1) + { + JSONObject obj = new JSONObject(); + Column col = (Column) arg0; + obj.put("columnName", col.getColumnName()); + obj.put("tableName", col.getTableName()); + return obj; + } + + @Override + public Object processObjectValue(String arg0, Object arg1, JsonConfig arg2) + { + JSONObject obj = new JSONObject(); + Column col = (Column) arg1; + obj.put("columnName", col.getColumnName()); + obj.put("tableName", col.getTableName()); + return obj; + } + }); + System.out.println(JSONArray.fromObject(task, config).toString(1)); + Job job = JobScheduler.INSTANCE().newJob(); + task.forEach(job::addTask); + job.setJobState(JobState.EXECUTING); + job.setPlan(plan); + // TaskScheduler.INSTANCE().executeJob(job); + } +} diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/JoinSemantic.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/JoinSemantic.java new file mode 100644 index 0000000..358d6c5 --- /dev/null +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/JoinSemantic.java @@ -0,0 +1,42 @@ +package cn.edu.ruc.iir.pard.semantic; + +import cn.edu.ruc.iir.pard.commons.exception.SemanticException; +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan2; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; +import cn.edu.ruc.iir.pard.sql.tree.Statement; +import org.testng.annotations.Test; + +public class JoinSemantic +{ + @Test + public void testSemantic() + { + UsePlan.setCurrentSchema("book"); + String stmt = "select Book.title,Book.copies,Publisher.name,Publisher.nation from Book,Publisher where Book.publisher_id=Publisher.id and Publisher.nation='USA' and Book.copies > 1000"; + SqlParser parser = new SqlParser(); + Statement stmts = parser.createStatement(stmt); + try { + QueryPlan plan = new QueryPlan2(stmts); + } + catch (SemanticException e) { + System.out.println(e.getSemanticErrorMessage()); + e.printStackTrace(); + } + } + @Test + public void testJoin() + { + UsePlan.setCurrentSchema("book"); + SqlParser parser = new SqlParser(); + Statement stmt = parser.createStatement("select customer.name, orders.quantity, book.title from customer,orders,book where customer.id=orders.id and book.id=orders.book_id and customer.rank=1 and book.copies>5000"); + plan(stmt); + } + public QueryPlan plan(Statement stmt) + { + QueryPlan plan = new QueryPlan2(stmt); + //plan.afterExecution(true); + return plan; + } +} diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/SemanticAnalysisTest.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/SemanticAnalysisTest.java index 44c9608..786aa61 100644 --- a/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/SemanticAnalysisTest.java +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/semantic/SemanticAnalysisTest.java @@ -1,5 +1,6 @@ package cn.edu.ruc.iir.pard.semantic; +import cn.edu.ruc.iir.pard.commons.exception.ErrorMessage; import cn.edu.ruc.iir.pard.etcd.dao.TableDao; import cn.edu.ruc.iir.pard.executor.connector.node.AggregationNode; import cn.edu.ruc.iir.pard.executor.connector.node.DistinctNode; @@ -14,12 +15,12 @@ import cn.edu.ruc.iir.pard.executor.connector.node.SortNode; import cn.edu.ruc.iir.pard.executor.connector.node.TableScanNode; import cn.edu.ruc.iir.pard.executor.connector.node.UnionNode; -import cn.edu.ruc.iir.pard.planner.ErrorMessage; import cn.edu.ruc.iir.pard.planner.PardPlanner; import cn.edu.ruc.iir.pard.planner.Plan; import cn.edu.ruc.iir.pard.planner.ddl.TableCreationPlan; import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan2; import cn.edu.ruc.iir.pard.sql.parser.SqlParser; import cn.edu.ruc.iir.pard.sql.tree.Node; import cn.edu.ruc.iir.pard.sql.tree.OrderBy; @@ -235,4 +236,17 @@ else if (node instanceof AggregationNode) { } return null; } + @Test + public void testCount() + { + UsePlan.setCurrentSchema("booktest"); + String cnt = "select count(*) from publisher"; + try { + Statement stmt = parser.createStatement(cnt); + QueryPlan plan = new QueryPlan2(stmt); + } + catch (Exception e) { + e.printStackTrace(); + } + } } diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardQueryHandlerTest.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardQueryHandlerTest.java new file mode 100644 index 0000000..31d144c --- /dev/null +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardQueryHandlerTest.java @@ -0,0 +1,74 @@ +package cn.edu.ruc.iir.pard.server; + +import cn.edu.ruc.iir.pard.executor.connector.JoinTask; +import cn.edu.ruc.iir.pard.executor.connector.PardResultSet; +import cn.edu.ruc.iir.pard.executor.connector.QueryTask; +import cn.edu.ruc.iir.pard.executor.connector.SendDataTask; +import cn.edu.ruc.iir.pard.executor.connector.Task; +import cn.edu.ruc.iir.pard.planner.PardPlanner; +import cn.edu.ruc.iir.pard.planner.Plan; +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryTestPlan; +import cn.edu.ruc.iir.pard.scheduler.Job; +import cn.edu.ruc.iir.pard.scheduler.JobScheduler; +import cn.edu.ruc.iir.pard.scheduler.JobScheduler.JobState; +import cn.edu.ruc.iir.pard.scheduler.TaskScheduler; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; +import cn.edu.ruc.iir.pard.sql.tree.Statement; +import cn.edu.ruc.iir.pard.web.PardServlet; +import org.testng.annotations.Test; + +import java.util.List; + +public class PardQueryHandlerTest +{ + SqlParser parser = new SqlParser(); + @Test + public void executeQuery() + { + UsePlan.setCurrentSchema("booktest"); + //String sql = "select Book.title,Book.copies,Publisher.name,Publisher.nation from Book,Publisher where Book.publisher_id=Publisher.id and Publisher.nation='USA' and Book.copies > 1000"; + //String sql = "select * from book@pard0"; + //String sql = "select * from book,orders where book.id=orders.book_id"; + //String sql = "select * from customer"; + String sql = "select customer_id,quantity from orders where quantity<8"; + Statement stmt = parser.createStatement(sql); + PardPlanner planner = new PardPlanner(); + Plan plan = planner.plan(stmt); + plan.setJobId("customer_k"); + QueryPlan qPlan = (QueryPlan) plan; + System.out.println(qPlan.getPlan()); + List task = TaskScheduler.INSTANCE().generateTasks(plan); + PardServlet.planList.add((QueryPlan) plan); + for (Task t : task) { + System.out.println(t.getTaskId()); + if (t instanceof SendDataTask) { + QueryPlan p = new QueryTestPlan(((SendDataTask) t).getNode(), "send_Data_" + t.getTaskId()); + System.out.println(p.getPlan()); + PardServlet.planList.add(p); + } + else if (t instanceof JoinTask) { + QueryPlan p = new QueryTestPlan(((JoinTask) t).getNode(), "Join_" + t.getTaskId()); + System.out.println(p.getPlan()); + PardServlet.planList.add(p); + } + else if (t instanceof QueryTask) { + QueryTask tt = (QueryTask) t; + QueryPlan p = new QueryTestPlan(tt.getPlanNode(), "table_" + t.getTaskId().replace('-', '_')); + System.out.println(p.getPlan()); + PardServlet.planList.add(p); + } + } + System.out.println(PardServlet.planList.size()); + + //PardWebServer.main(new String[0]); + + Job job = JobScheduler.INSTANCE().newJob(); + task.forEach(job::addTask); + job.setJobState(JobState.EXECUTING); + job.setPlan(plan); + PardResultSet rs = TaskScheduler.INSTANCE().executeJob(job); + PardWebServer.main(new String[0]); + } +} diff --git a/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardWebTest.java b/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardWebTest.java new file mode 100644 index 0000000..249488a --- /dev/null +++ b/pard-main/src/test/java/cn/edu/ruc/iir/pard/server/PardWebTest.java @@ -0,0 +1,35 @@ +package cn.edu.ruc.iir.pard.server; + +import cn.edu.ruc.iir.pard.planner.ddl.UsePlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan; +import cn.edu.ruc.iir.pard.planner.dml.QueryPlan2; +import cn.edu.ruc.iir.pard.sql.parser.SqlParser; +import cn.edu.ruc.iir.pard.sql.tree.Statement; +import cn.edu.ruc.iir.pard.web.PardServlet; +import org.testng.annotations.Test; + +public class PardWebTest +{ + @Test + public void test() + { + SqlParser parser = new SqlParser(); + UsePlan.setCurrentSchema("book"); + Statement stmt = parser.createStatement("select Book.title,Book.copies,Publisher.name,Publisher.nation from Book,Publisher where Book.publisher_id=Publisher.id and Publisher.nation='USA' and Book.copies > 1000"); + plan(stmt); + stmt = parser.createStatement("select customer.name, orders.quantity, book.title from customer,orders,book where customer.id=orders.customer_id and book.id=orders.book_id and customer.rank=1 and book.copies>5000"); + plan(stmt); + stmt = parser.createStatement("select * from customer"); + plan(stmt); + stmt = parser.createStatement("select * from publisher"); + plan(stmt); + System.out.println(PardServlet.planList.size()); + PardWebServer.main(new String[0]); + } + public QueryPlan plan(Statement stmt) + { + QueryPlan plan = new QueryPlan2(stmt); + plan.afterExecution(true); + return plan; + } +} diff --git a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/ColumnItem.java b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/ColumnItem.java index 5f14196..ce65ae1 100644 --- a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/ColumnItem.java +++ b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/ColumnItem.java @@ -42,8 +42,8 @@ public ColumnItem(ColumnItem ci) public ColumnItem(String tableName, String columnName, int dataType) { super(); - this.tableName = tableName; - this.columnName = columnName; + this.tableName = tableName.toLowerCase(); + this.columnName = columnName.toLowerCase(); this.dataType = dataType; } diff --git a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Expr.java b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Expr.java index 5e8e042..b12d019 100644 --- a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Expr.java +++ b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Expr.java @@ -12,7 +12,9 @@ import java.io.Serializable; import java.util.ArrayList; +import java.util.LinkedList; import java.util.List; +import java.util.Queue; public abstract class Expr implements Serializable @@ -53,6 +55,107 @@ public static LogicOperator getReverse(LogicOperator opt) public Expr() { } + public static Expr replace(Expr e1, ColumnItem from, ColumnItem to) + { + Expr e = Expr.clone(e1); + if (e instanceof SingleExpr) { + SingleExpr se = (SingleExpr) e; + Item lv = se.getLvalue(); + Item rv = se.getRvalue(); + if (lv.equals(from)) { + lv = Item.clone(to); + } + if (rv.equals(from)) { + rv = Item.clone(to); + } + return new SingleExpr(lv, rv, se.getCompareType()); + } + else if (e instanceof CompositionExpr) { + CompositionExpr ce = (CompositionExpr) e; + for (int i = 0; i < ce.getConditions().size(); i++) { + Expr ex = ce.getConditions().get(i); + ce.getConditions().set(i, replace(ex, from, to)); + } + return ce; + } + else if (e instanceof UnaryExpr) { + UnaryExpr ue = (UnaryExpr) e; + return new UnaryExpr(ue.getCompareType(), replace(ue.getExpression(), from, to)); + } + else if (e instanceof TrueExpr || e instanceof FalseExpr) { + return e; + } + return e; + } + public static Expr replaceTableName(Expr e1, String from, String to) + { + Expr e = Expr.clone(e1); + if (e instanceof SingleExpr) { + System.out.println("from " + e.toString()); + SingleExpr se = (SingleExpr) e; + Item lv = se.getLvalue(); + Item rv = se.getRvalue(); + if (lv instanceof ColumnItem && ((ColumnItem) lv).getTableName().equalsIgnoreCase(from)) { + ColumnItem ci = (ColumnItem) lv; + lv = new ColumnItem(to, ci.getColumnName(), ci.getDataType()); + } + if (rv instanceof ColumnItem && ((ColumnItem) rv).getTableName().equalsIgnoreCase(from)) { + ColumnItem ci = (ColumnItem) rv; + rv = new ColumnItem(to, ci.getColumnName(), ci.getDataType()); + } + se = new SingleExpr(lv, rv, se.getCompareType()); + //System.out.println("to " + se.toString()); + return se; + } + else if (e instanceof CompositionExpr) { + CompositionExpr ce = (CompositionExpr) e; + for (int i = 0; i < ce.getConditions().size(); i++) { + Expr ex = ce.getConditions().get(i); + ce.getConditions().set(i, replaceTableName(ex, from, to)); + } + return ce; + } + else if (e instanceof UnaryExpr) { + UnaryExpr ue = (UnaryExpr) e; + return new UnaryExpr(ue.getCompareType(), replaceTableName(ue.getExpression(), from, to)); + } + else if (e instanceof TrueExpr || e instanceof FalseExpr) { + return e; + } + return e; + } + public static Expr generalReplace(Expr e1, Item from, Item to) + { + Expr e = Expr.clone(e1); + if (e instanceof SingleExpr) { + SingleExpr se = (SingleExpr) e; + Item lv = se.getLvalue(); + Item rv = se.getRvalue(); + if (lv.equals(from)) { + lv = Item.clone(to); + } + if (rv.equals(from)) { + rv = Item.clone(to); + } + return new SingleExpr(lv, rv, se.getCompareType()); + } + else if (e instanceof CompositionExpr) { + CompositionExpr ce = (CompositionExpr) e; + for (int i = 0; i < ce.getConditions().size(); i++) { + Expr ex = ce.getConditions().get(i); + ce.getConditions().set(i, generalReplace(ex, from, to)); + } + return ce; + } + else if (e instanceof UnaryExpr) { + UnaryExpr ue = (UnaryExpr) e; + return new UnaryExpr(ue.getCompareType(), generalReplace(ue.getExpression(), from, to)); + } + else if (e instanceof TrueExpr || e instanceof FalseExpr) { + return e; + } + return e; + } public static Expr clone(Expr expr) { if (expr instanceof CompositionExpr) { @@ -84,7 +187,7 @@ private static List extractList(Expr expr, String tableName, boolean rec) SingleExpr se = (SingleExpr) e; if (se.getLvalue() instanceof ColumnItem && se.getRvalue() instanceof ValueItem) { ColumnItem ci = (ColumnItem) se.getLvalue(); - if (tableName != null && tableName.equals(ci.getTableName())) { + if (tableName != null && tableName.equalsIgnoreCase(ci.getTableName())) { list.add(se); } else { @@ -108,6 +211,114 @@ else if (expr instanceof SingleExpr) { } return list; } + // 可能有重复值 + public static List extractTableColumn(Expr expr, String tableName) + { + Expr extractOr = pdAnd.apply(expr); + Queue traverse = new LinkedList(); + Queue output = new LinkedList(); + List out = new ArrayList(); + traverse.add(extractOr); + while (!traverse.isEmpty()) { + Expr pop = traverse.poll(); + if (pop instanceof CompositionExpr) { + traverse.addAll(((CompositionExpr) pop).getConditions()); + } + else if (pop instanceof UnaryExpr) { + traverse.add(((UnaryExpr) pop).getExpression()); + } + else { + output.add((SingleExpr) pop); + } + } + while (!output.isEmpty()) { + SingleExpr se = output.poll(); + Item lv = se.getLvalue(); + Item rv = se.getRvalue(); + if (lv instanceof ColumnItem && tableName.equals(((ColumnItem) lv).getTableName())) { + out.add(((ColumnItem) lv).getColumnName()); + } + if (rv instanceof ColumnItem && tableName.equals(((ColumnItem) rv).getTableName())) { + out.add(((ColumnItem) rv).getColumnName()); + } + } + return out; + } + public static List extractTableJoinExpr(Expr expr) + { + Expr extractOr = pdOr.apply(expr); + Queue traverse = new LinkedList(); + Queue output = new LinkedList(); + List out = new ArrayList(); + traverse.add(extractOr); + while (!traverse.isEmpty()) { + Expr pop = traverse.poll(); + if (pop instanceof CompositionExpr) { + traverse.addAll(((CompositionExpr) pop).getConditions()); + } + else if (pop instanceof UnaryExpr) { + traverse.add(((UnaryExpr) pop).getExpression()); + } + else { + output.add((SingleExpr) pop); + } + } + while (!output.isEmpty()) { + SingleExpr se = output.poll(); + Item lv = se.getLvalue(); + Item rv = se.getRvalue(); + if (lv instanceof ColumnItem && rv instanceof ColumnItem) { + out.add(se); + } + } + return out; + } + //TODO: 从extractTableFilter的结果里提取垂直分片的信息。OK + // 以及提取多表连接的条件 + // 以及 运用表连接条件和分片信息对表达式进一步化简 + public static Expr extractTableColumnFilter(Expr expr, List projectList) + { + expr = pdOr.apply(expr); + //只考虑两种情况,expr为SingleExpr和expr为 and的compositionExpr + CompositionExpr and = new CompositionExpr(LogicOperator.AND); + if (expr instanceof SingleExpr) { + SingleExpr e = (SingleExpr) expr; + ColumnItem ci = null; + if (e.getLvalue() instanceof ColumnItem) { + ci = (ColumnItem) e.getLvalue(); + } + else { + return new TrueExpr(); + } + if (projectList.contains(ci.getColumnName())) { + return expr; + } + } + else { + if (expr instanceof CompositionExpr && ((CompositionExpr) expr).getLogicOperator() == LogicOperator.AND) { + CompositionExpr ce = (CompositionExpr) expr; + for (Expr sub : ce.getConditions()) { + if (sub instanceof SingleExpr) { + SingleExpr e = (SingleExpr) sub; + ColumnItem ci = null; + if (e.getLvalue() instanceof ColumnItem) { + ci = (ColumnItem) e.getLvalue(); + } + if (projectList.contains(ci.getColumnName())) { + and.getConditions().add(sub); + } + } + } + if (and.getConditions().size() == 1) { + return and.getConditions().get(0); + } + else if (and.getConditions().size() > 1) { + return and; + } + } + } + return new TrueExpr(); + } public static Expr extractTableFilter(Expr expr, String tableName) { Expr extractOr = pdAnd.apply(expr); @@ -163,6 +374,15 @@ public static Expr and(Expr e1, Expr e2, LogicOperator opt) Expr and = optimize(comp, opt); return and; } + public static Expr or(Expr e1, Expr e2, LogicOperator opt) + { + CompositionExpr comp = new CompositionExpr(LogicOperator.OR); + comp.getConditions().add(e1); + comp.getConditions().add(e2); + comp = PushDownLaw.formatExpr(comp); + Expr and = optimize(comp, opt); + return and; + } public static Expr parse(List conditions, String tableName) { if (conditions.isEmpty()) { diff --git a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Item.java b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Item.java index a7c577c..a49d59a 100644 --- a/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Item.java +++ b/pard-optimizer/src/main/java/cn/edu/ruc/iir/pard/sql/expr/Item.java @@ -37,7 +37,7 @@ public static Item parse(Expression expr) DereferenceExpression de = (DereferenceExpression) expr; //System.out.println("base :" + de.getBase().getClass().getName()); //System.out.println("field:" + de.getField()); - ColumnItem ci = new ColumnItem(de.getBase().toString(), de.getField().toString(), 0); + ColumnItem ci = new ColumnItem(de.getBase().toString().toLowerCase(), de.getField().toString().toLowerCase(), 0); ci.expression = expr; return ci; } diff --git a/pard-optimizer/src/test/java/cn/edu/ruc/iir/pard/sql/expr/ExprTest.java b/pard-optimizer/src/test/java/cn/edu/ruc/iir/pard/sql/expr/ExprTest.java index 4a05d71..fb45d37 100644 --- a/pard-optimizer/src/test/java/cn/edu/ruc/iir/pard/sql/expr/ExprTest.java +++ b/pard-optimizer/src/test/java/cn/edu/ruc/iir/pard/sql/expr/ExprTest.java @@ -1,5 +1,9 @@ package cn.edu.ruc.iir.pard.sql.expr; +import cn.edu.ruc.iir.pard.catalog.Column; +import cn.edu.ruc.iir.pard.commons.memory.Row; +import cn.edu.ruc.iir.pard.commons.utils.DataType.DataTypeInt; +import cn.edu.ruc.iir.pard.commons.utils.RowConstructor; import cn.edu.ruc.iir.pard.sql.expr.Expr.LogicOperator; import cn.edu.ruc.iir.pard.sql.expr.rules.ContainEliminateLaw; import cn.edu.ruc.iir.pard.sql.expr.rules.MinimalItemLaw; @@ -16,6 +20,8 @@ import cn.edu.ruc.iir.pard.sql.tree.Statement; import org.testng.annotations.Test; +import java.util.ArrayList; +import java.util.List; import java.util.Optional; public class ExprTest @@ -134,4 +140,90 @@ public void justTest() } } } + /** + * A表 id name city age + * */ + @Test + public void testGeneralReplace() + { + SqlParser parser = new SqlParser(); + String expr = "a.id>'E100' and a.age<18"; + Expression expression = parser.createExpression(expr); + RowConstructor constructor = new RowConstructor(); + constructor.appendString("'E101'"); + constructor.appendString("'Geroge'"); + constructor.appendString("'Beijing'"); + constructor.appendInt(12); + Row row = constructor.build(); + String[] col = new String[]{"id", "name", "city", "age"}; + List cols = new ArrayList<>(); + for (int i = 0; i < col.length; i++) { + Column c = new Column(); + c.setColumnName(col[i]); + c.setTableName("a"); + c.setDataType(DataTypeInt.VARCHAR); + if (i == col.length - 1) { + c.setDataType(DataTypeInt.INT); + } + cols.add(c); + } + boolean match = match(expression, row, cols); + System.out.println(match); + expr = "a.id>'E200' and a.age<18"; + expression = parser.createExpression(expr); + match = match(expression, row, cols); + System.out.println(match); + } + + public Boolean match(Expression expr, Row row, List col) + { + List types = new ArrayList(); + col.forEach(x -> types.add(x.getDataType())); + String[] list = RowConstructor.printRow(row, types).split("\t"); + List ciList = new ArrayList(); + List vList = new ArrayList(); + Expr e = Expr.parse(expr); + for (int i = 0; i < list.length; i++) { + ColumnItem ci = new ColumnItem(col.get(i).getTableName(), col.get(i).getColumnName(), col.get(i).getDataType()); + ValueItem vi = new ValueItem(parseFromString(col.get(i).getDataType(), list[i])); + ciList.add(ci); + vList.add(vi); + } + for (int i = 0; i < list.length; i++) { + ColumnItem ci = ciList.get(i); + ValueItem vi = vList.get(i); + e = Expr.generalReplace(e, ci, vi); + } + //System.out.println(e.toString()); + e = Expr.optimize(e, LogicOperator.AND); + if (e instanceof TrueExpr) { + return true; + } + else if (e instanceof FalseExpr) { + return false; + } + return null; + } + + public static Comparable parseFromString(int dataType, String value) + { + switch(dataType) { + case DataTypeInt.SMALLINT: + case DataTypeInt.BIGINT: + case DataTypeInt.INT: + return Long.parseLong(value); + case DataTypeInt.FLOAT: + case DataTypeInt.DOUBLE: + return Double.parseDouble(value); + case DataTypeInt.TEXT: + case DataTypeInt.CHAR: + case DataTypeInt.VARCHAR: + return value; + case DataTypeInt.TIME: + case DataTypeInt.DATE: + case DataTypeInt.TIMESTAMP: + return value; + } + return value; + } } diff --git a/pom.xml b/pom.xml index de40462..430ddef 100644 --- a/pom.xml +++ b/pom.xml @@ -24,6 +24,7 @@ + UTF-8 4.6 false true diff --git a/upload.sh b/upload.sh new file mode 100644 index 0000000..548b945 --- /dev/null +++ b/upload.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash + +cd `dirname $0` +cd pard +mvn clean +mvn package +cd .. +scp pard/pard-main/target/pard-server.jar pard@10.77.40.41:/home/pard/pard-src/pard-server.jar +scp pard/pard-client/target/pard-client.jar pard@10.77.40.41:/home/pard/pard-src/pard-client.jar +ssh pard@10.77.40.41 /home/pard/pard-src/deploy_2.sh