Skip to content

Common Table Expressions

Konstantin Triger edited this page Aug 15, 2019 · 9 revisions

CTE (Common Table Expression) is one of the less known SQL constructs, especially among Java developers. And this is very unfortunate, since they let effectively query hierarchical data. Without them such queries are more expensive by an order of a magnitude.

From Essential SQL site:

Recursive CTEs are special in the sense they are allowed to reference themselves! Because of this special ability, you can use recursive CTEs to solve problems other queries cannot. Recursive CTEs are really good at working with hierarchical data such as org charts or bill of materials.

Started in Fundamentals, we continue writing tutorial samples with FluentJPA. We start examining non-recursive CTEs first (that make the code much simpler to read, but eventually can be implemented using sub queries). And then continue to recursive CTEs.

A) Simple SQL Server CTE

@Tuple
@Getter
public static class CTESalesAmounts {
    private String staff;
    private float sales;
    private int year;
}

FluentQuery query = FluentJPA.SQL(() -> {

    CTESalesAmounts salesAmounts = subQuery((Order o,
                                             OrderItem oi,
                                             Staff s) -> {
        String staff = alias(s.getFirstName() + " " + s.getLastName(),
                             CTESalesAmounts::getStaff);
        Float sales = alias(
                       SUM(oi.getQuantity() * oi.getListPrice() * (1 - oi.getDiscount())),
                       CTESalesAmounts::getSales);
        Integer year = alias(YEAR(o.getOrderDate()), CTESalesAmounts::getYear);

        SELECT(staff, sales, year);

        FROM(o).JOIN(oi).ON(oi.getOrder() == o).JOIN(s).ON(s == o.getStaff());

        GROUP(BY(staff), BY(year));
    });

    WITH(salesAmounts);

    SELECT(salesAmounts.getStaff(), salesAmounts.getSales());
    FROM(salesAmounts);
    WHERE(salesAmounts.getYear() == 2018);
});

B) ... make report averages based on counts

@Tuple
@Getter
public static class CTESales {
    private int staffId;
    private int orderCount;
}

FluentQuery query = FluentJPA.SQL(() -> {

    CTESales sales = subQuery((Order o) -> {
        Integer staffId = alias(o.getStaff().getId(), CTESales::getStaffId);
        Integer orderCount = alias(COUNT(), CTESales::getOrderCount);

        SELECT(staffId, orderCount);
        FROM(o);
        WHERE(YEAR(o.getOrderDate()) == 2018);
        GROUP(BY(staffId));
    });

    WITH(sales);

    SELECT(alias(AVG(sales.getOrderCount()), "average_orders_by_staff"));
    FROM(sales);
});

C) Using multiple SQL Server CTE

@Tuple
    @Getter
    public static class CTECategorySales {
        private int categoryId;
        private float sales;
    }

    @Tuple
    @Getter
    public static class CTECategoryCounts {
        private int categoryId;
        private String categoryName;
        private int productCount;
    }

FluentQuery query = FluentJPA.SQL(() -> {

    CTECategoryCounts categoryCounts = subQuery((Product p,
                                                 Category cat) -> {

        Integer catId = alias(cat.getId(), CTECategoryCounts::getCategoryId);
        String catName = alias(cat.getName(), CTECategoryCounts::getCategoryName);
        Integer productCount = alias(COUNT(), CTECategoryCounts::getProductCount);

        SELECT(catId, catName, productCount);

        FROM(p).JOIN(cat).ON(p.getCategory() == cat);

        GROUP(BY(catId), BY(catName));
    });

    CTECategorySales categorySales = subQuery((OrderItem oi,
                                               Product p,
                                               Order o) -> {
        Integer catId = alias(p.getCategory().getId(), CTECategorySales::getCategoryId);
        Float sales = alias(
                       SUM(oi.getQuantity() * oi.getListPrice() * (1 - oi.getDiscount())),
                       CTECategorySales::getSales);

        SELECT(catId, sales);

        FROM(oi).JOIN(p).ON(p == oi.getProduct()).JOIN(o).ON(o == oi.getOrder());
        WHERE(o.getStatus() == 4); // completed
        GROUP(BY(catId));
    });

    WITH(categoryCounts, categorySales);

    SELECT(categoryCounts.getCategoryId(), categoryCounts.getCategoryName(),
            categoryCounts.getProductCount(), categorySales.getSales());

    FROM(categoryCounts).JOIN(categorySales)
            .ON(categorySales.getCategoryId() == categoryCounts.getCategoryId());

    GROUP(BY(categoryCounts.getCategoryName()));
});

Oracle SELECT docs - multiple correlated CTEs

Subquery Factoring: Example

References first CTE from the second using byRef() directive.

@Tuple
@Getter
public static class DeptCost {
    private String departmentName;
    private int deptTotal;
}

@Tuple
@Getter
public static class AvgCost {
    private int avg;
}

FluentQuery query = FluentJPA.SQL(() -> {

    DeptCost deptCost = subQuery((Employee e,
                                    Department d) -> {
        Integer deptTotal = alias(SUM(e.getSalary()), DeptCost::getDeptTotal);
        String deptName = alias(d.getDepartmentName(), DeptCost::getDepartmentName);

        SELECT(deptName, deptTotal);
        FROM(e, d);
        WHERE(e.getDepartment() == d);
        GROUP(BY(deptName));
    });

    AvgCost avgCost = subQuery(() -> {

        Integer avg = alias(SUM(deptCost.getDeptTotal()) / COUNT(), AvgCost::getAvg);

        SELECT(avg);
        // at this point FluentJPA is unaware that deptCost will be declared
        // using WITH, and without byRef() will generate a sub select
        FROM(byRef(deptCost));
    });

    WITH(deptCost, avgCost);

    selectAll(deptCost);
    WHERE(deptCost.getDeptTotal() > pick(avgCost, avgCost.getAvg()));
    ORDER(BY(deptCost.getDepartmentName()));

});

Basic Recursive CTE

Since Java lambda can capture only effectively final before lambda declaration variables, it cannot reference cte_numbers (see below). To overcome this limitation FluentJPA introduced recurseOn() directive, which effectively reference the sub query itself. Of course cte_numbers and t variables must be of the same type.

@Tuple
    @Getter
    public static class CTENumbers {
        private int n;
    }

    FluentQuery query = FluentJPA.SQL(() -> {

        CTENumbers cte_numbers = subQuery((CTENumbers t) -> {
            // initial
            SELECT(1);

            UNION_ALL();

            // recursive
            SELECT(t.getN() + 1);
            // recurse on t
            FROM(recurseOn(t));
        });

        WITH(RECURSIVE(viewOf(cte_numbers, CTENumbers::getN)));
        SELECT(cte_numbers.getN());
        FROM(cte_numbers);
        LIMIT(3);
    });

B) ... recursive CTE to query hierarchical data

@Tuple
@Getter
public static class CTEOrg {
    @ManyToOne
    @JoinColumn(name = "staff_id")
    private Staff staff;

    private String firstName;

    @ManyToOne
    @JoinColumn(name = "manager_id")
    private Staff manager;
}

FluentQuery query = FluentJPA.SQL(() -> {

    CTEOrg org = subQuery((CTEOrg it,
                           Staff staffManager,
                           Staff staffSubordinate) -> {
        // initial
        SELECT(staffManager.getId(),
               staffManager.getFirstName(),
               staffManager.getManager());

        FROM(staffManager);

        WHERE(staffManager.getManager() == null);

        UNION_ALL();

        // recursive
        SELECT(staffSubordinate.getId(),
               staffSubordinate.getFirstName(),
               staffSubordinate.getManager());

        // recurse on org
        FROM(staffSubordinate).JOIN(recurseOn(it))
                              .ON(it.getStaff() == staffSubordinate.getManager());
    });

    // in SQL Server or Oracle RECURSIVE modifier is not needed,
    // in PostgreSQL it would be required
    WITH(org);

    SELECT(org);
    FROM(org);

});
Clone this wiki locally