Source code for pyflink.table.statement_set

################################################################################
#  Licensed to the Apache Software Foundation (ASF) under one
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
from typing import Union

from pyflink.java_gateway import get_gateway
from pyflink.table import ExplainDetail
from pyflink.table.table_descriptor import TableDescriptor
from pyflink.table.table_result import TableResult
from pyflink.util.java_utils import to_j_explain_detail_arr

__all__ = ['StatementSet']


class StatementSet(object):
    """
    A :class:`~StatementSet` accepts pipelines defined by DML statements or :class:`~Table` objects.
    The planner can optimize all added statements together and then submit them as one job.

    The added statements will be cleared when calling the :func:`~StatementSet.execute` method.

    .. versionadded:: 1.11.0
    """

    def __init__(self, _j_statement_set, t_env):
        self._j_statement_set = _j_statement_set
        self._t_env = t_env

[docs]    def add_insert_sql(self, stmt: str) -> 'StatementSet':
        """
        add insert statement to the set.

        :param stmt: The statement to be added.
        :return: current StatementSet instance.

        .. versionadded:: 1.11.0
        """
        self._j_statement_set.addInsertSql(stmt)
        return self

[docs]    def attach_as_datastream(self):
        """
        Optimizes all statements as one entity and adds them as transformations to the underlying
        StreamExecutionEnvironment.

        Use :func:`~pyflink.datastream.StreamExecutionEnvironment.execute` to execute them.

        The added statements will be cleared after calling this method.

        .. versionadded:: 1.16.0
        """
        self._j_statement_set.attachAsDataStream()

[docs]    def add_insert(self,
                   target_path_or_descriptor: Union[str, TableDescriptor],
                   table,
                   overwrite: bool = False) -> 'StatementSet':
        """
        Adds a statement that the pipeline defined by the given Table object should be written to a
        table (backed by a DynamicTableSink) that was registered under the specified path or
        expressed via the given TableDescriptor.

        1. When target_path_or_descriptor is a tale path:

            See the documentation of :func:`~TableEnvironment.use_database` or
            :func:`~TableEnvironment.use_catalog` for the rules on the path resolution.

        2. When target_path_or_descriptor is a table descriptor:

            The given TableDescriptor is registered as an inline (i.e. anonymous) temporary catalog
            table (see :func:`~TableEnvironment.create_temporary_table`).

            Then a statement is added to the statement set that inserts the Table object's pipeline
            into that temporary table.

            This method allows to declare a Schema for the sink descriptor. The declaration is
            similar to a {@code CREATE TABLE} DDL in SQL and allows to:

                1. overwrite automatically derived columns with a custom DataType
                2. add metadata columns next to the physical columns
                3. declare a primary key

            It is possible to declare a schema without physical/regular columns. In this case, those
            columns will be automatically derived and implicitly put at the beginning of the schema
            declaration.

            Examples:
            ::

                >>> stmt_set = table_env.create_statement_set()
                >>> source_table = table_env.from_path("SourceTable")
                >>> sink_descriptor = TableDescriptor.for_connector("blackhole") \\
                ...     .schema(Schema.new_builder()
                ...         .build()) \\
                ...     .build()
                >>> stmt_set.add_insert(sink_descriptor, source_table)

            .. note:: add_insert for a table descriptor (case 2.) was added from
                flink 1.14.0.

        :param target_path_or_descriptor: The path of the registered
            :class:`~pyflink.table.TableSink` or the descriptor describing the sink table into which
            data should be inserted to which the :class:`~pyflink.table.Table` is written.
        :param table: The Table to add.
        :type table: pyflink.table.Table
        :param overwrite: Indicates whether the insert should overwrite existing data or not.
        :return: current StatementSet instance.

        .. versionadded:: 1.11.0
        """
        if isinstance(target_path_or_descriptor, str):
            self._j_statement_set.addInsert(target_path_or_descriptor, table._j_table, overwrite)
        else:
            self._j_statement_set.addInsert(
                target_path_or_descriptor._j_table_descriptor, table._j_table, overwrite)
        return self

[docs]    def explain(self, *extra_details: ExplainDetail) -> str:
        """
        returns the AST and the execution plan of all statements and Tables.

        :param extra_details: The extra explain details which the explain result should include,
                              e.g. estimated cost, changelog mode for streaming
        :return: All statements and Tables for which the AST and execution plan will be returned.

        .. versionadded:: 1.11.0
        """
        TEXT = get_gateway().jvm.org.apache.flink.table.api.ExplainFormat.TEXT
        j_extra_details = to_j_explain_detail_arr(extra_details)
        return self._j_statement_set.explain(TEXT, j_extra_details)

[docs]    def execute(self) -> TableResult:
        """
        execute all statements and Tables as a batch.

        .. note::
            The added statements and Tables will be cleared when executing this method.

        :return: execution result.

        .. versionadded:: 1.11.0
        """
        self._t_env._before_execute()
        return TableResult(self._j_statement_set.execute())