diff --git a/include/hidet/runtime/callbacks.h b/include/hidet/runtime/callbacks.h index 3a104ac66..84fe7064c 100644 --- a/include/hidet/runtime/callbacks.h +++ b/include/hidet/runtime/callbacks.h @@ -9,6 +9,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include #include diff --git a/include/hidet/runtime/cpu/bfloat16.h b/include/hidet/runtime/cpu/bfloat16.h index 864f41100..44dc93f55 100644 --- a/include/hidet/runtime/cpu/bfloat16.h +++ b/include/hidet/runtime/cpu/bfloat16.h @@ -88,7 +88,7 @@ CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - +#pragma once #include #include #include diff --git a/include/hidet/runtime/cpu/complex.h b/include/hidet/runtime/cpu/complex.h index d1f3bd57e..9e7cad01a 100644 --- a/include/hidet/runtime/cpu/complex.h +++ b/include/hidet/runtime/cpu/complex.h @@ -9,6 +9,8 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. +#pragma once + #include typedef std::complex complex64_t; diff --git a/include/hidet/runtime/cpu/float16.h b/include/hidet/runtime/cpu/float16.h index 42bea52f8..011380edb 100644 --- a/include/hidet/runtime/cpu/float16.h +++ b/include/hidet/runtime/cpu/float16.h @@ -89,6 +89,8 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#pragma once + #include #include #include diff --git a/include/hidet/runtime/cpu/float32.h b/include/hidet/runtime/cpu/float32.h index 762336418..a2de27e72 100644 --- a/include/hidet/runtime/cpu/float32.h +++ b/include/hidet/runtime/cpu/float32.h @@ -9,6 +9,9 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + +#pragma once + #include static inline float rsqrtf(float x) diff --git a/python/hidet/graph/ops/__init__.py b/python/hidet/graph/ops/__init__.py index 85e583309..5f31cc48c 100644 --- a/python/hidet/graph/ops/__init__.py +++ b/python/hidet/graph/ops/__init__.py @@ -10,7 +10,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=redefined-builtin -from .matmul import batch_matmul, matmul, matmul_x86, matmul_cublas +from .matmul import batch_matmul, matmul, batch_matmul_x86, matmul_cublas from .conv1d import conv1d, conv1d_gemm from .conv1d_transpose import conv1d_transpose from .conv2d import conv2d, conv2d_channel_last, conv2d_winograd, conv2d_gemm, conv2d_gemm_fp16 diff --git a/python/hidet/graph/ops/matmul/__init__.py b/python/hidet/graph/ops/matmul/__init__.py index ae3f4c217..d5ea6210b 100644 --- a/python/hidet/graph/ops/matmul/__init__.py +++ b/python/hidet/graph/ops/matmul/__init__.py @@ -16,4 +16,4 @@ from .matmul_f32_x86 import Matmulx86Op, MatmulF32Taskx86 -from .matmul_f32_x86 import matmul_x86 +from .matmul_f32_x86 import batch_matmul_x86 diff --git a/python/hidet/graph/ops/matmul/matmul_f32_x86.py b/python/hidet/graph/ops/matmul/matmul_f32_x86.py index eeb467b30..7126fd210 100644 --- a/python/hidet/graph/ops/matmul/matmul_f32_x86.py +++ b/python/hidet/graph/ops/matmul/matmul_f32_x86.py @@ -82,7 +82,7 @@ def implement_cpu(self, working_dir: str) -> Union[IRModule, List[IRModule]]: return tune.extract_ir_modules(self.schedule_matmulf32_x86) @tune.space(1, MC=[2016], NC=[256, 384, 512], KC=[384, 512, 560], ways=[(1, 4, 2, 1)]) - def schedule_matmulf32_x86(self, MC=2016, NC=384, KC=560, ways=(1, 4, 2, 1)) -> IRModule: + def schedule_matmulf32_x86(self, MC=2016, NC=384, KC=560, ways=(1, 1, 1, 1)) -> IRModule: import hidet from hidet.ir.type import tensor_type from hidet.lang import tensor, grid, as_tensor_pointer @@ -858,5 +858,5 @@ def __init__(self, a: Tensor, b: Tensor): super().__init__(inputs=[a, b], attributes={}, task=task) -def matmul_x86(a: Tensor, b: Tensor) -> Tensor: +def batch_matmul_x86(a: Tensor, b: Tensor) -> Tensor: return Matmulx86Op(a, b).outputs[0] diff --git a/tests/operators/test_matmul.py b/tests/operators/test_matmul.py index c5c67aa50..00b09de72 100644 --- a/tests/operators/test_matmul.py +++ b/tests/operators/test_matmul.py @@ -26,7 +26,7 @@ def test_matmul_x86(a_shape, b_shape): a_shape, b_shape, lambda x, y: np.matmul(x, y), - lambda x, y: ops.matmul_x86(x, y) - ops.matmul_x86(x, y) + ops.matmul_x86(x, y), + lambda x, y: ops.batch_matmul_x86(x, y) - ops.batch_matmul_x86(x, y) + ops.batch_matmul_x86(x, y), dtype="float32", atol=1e-4, rtol=1e-4,